In [14]:
#from pycaret.classification import *
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn import preprocessing 

In [15]:
raw_data = pd.read_csv('./data/audio_data_genre.csv').drop(columns=['filename'])

In [16]:
print(raw_data['label'].unique())

['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop'
 'reggae' 'rock']


In [18]:
COLS_USED = raw_data.columns.to_list()
COLS_TRAIN = [col for col in COLS_USED if col != 'label']

In [21]:
data = {
    'label': ['blues', 'classical','country','disco','hiphop','jazz','metal','pop',
 'reggae','rock']
}
raw_data = pd.DataFrame(data)

print("Before transformation:")
print(raw_data.head())

label_encoder = preprocessing.LabelEncoder()
raw_data['label_encoded'] = label_encoder.fit_transform(raw_data['label'])

# Create a mapping from encoded labels to original labels
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

# After transformation
print("\nAfter transformation:")
print(raw_data.head())

# Print the mapping for better readability
print("\nLabel mapping:")
for original_label, encoded_label in label_mapping.items():
    print(f"{encoded_label}: {original_label}")

Before transformation:
       label
0      blues
1  classical
2    country
3      disco
4     hiphop

After transformation:
       label  label_encoded
0      blues              0
1  classical              1
2    country              2
3      disco              3
4     hiphop              4

Label mapping:
0: blues
1: classical
2: country
3: disco
4: hiphop
5: jazz
6: metal
7: pop
8: reggae
9: rock


In [10]:
print(raw_data['label'].unique())

[0 1 2 3 4 5 6 7 8 9]


In [6]:
raw_data = raw_data[COLS_USED]

# Split into training and test sets
df_x_train, df_x_test, df_y_train, df_y_test = train_test_split(
    raw_data.drop("label", axis=1),
    raw_data["label"],
    test_size=0.20,
    random_state=42,
)

# Create dataframe versions for tabular GAN
df_x_test, df_y_test = df_x_test.reset_index(drop=True), \
  df_y_test.reset_index(drop=True)
df_y_train = pd.DataFrame(df_y_train)
df_y_test = pd.DataFrame(df_y_test)

# Pandas to Numpy
x_train = df_x_train.values
x_test = df_x_test.values
y_train = df_y_train.values
y_test = df_y_test.values

In [7]:
model = Sequential()
# Hidden 1
model.add(Dense(50, input_dim=x_train.shape[1], activation='relu')) 
model.add(Dense(25, activation='relu')) # Hidden 2
model.add(Dense(12, activation='relu')) # Hidden 2
model.add(Dense(1)) # Output
model.compile(loss='mean_squared_error', optimizer='adam')

monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, 
        patience=5, verbose=1, mode='auto',
        restore_best_weights=True)
model.fit(x_train,y_train,validation_data=(x_test,y_test),
        callbacks=[monitor], verbose=2,epochs=1000)

Epoch 1/1000
25/25 - 1s - loss: 177404.7188 - val_loss: 6019.2393 - 633ms/epoch - 25ms/step
Epoch 2/1000
25/25 - 0s - loss: 9936.8203 - val_loss: 5509.3457 - 59ms/epoch - 2ms/step
Epoch 3/1000
25/25 - 0s - loss: 3939.6267 - val_loss: 2931.1685 - 54ms/epoch - 2ms/step
Epoch 4/1000
25/25 - 0s - loss: 2626.1289 - val_loss: 1634.4751 - 56ms/epoch - 2ms/step
Epoch 5/1000
25/25 - 0s - loss: 1779.0421 - val_loss: 1259.9781 - 66ms/epoch - 3ms/step
Epoch 6/1000
25/25 - 0s - loss: 1354.0967 - val_loss: 1047.7742 - 61ms/epoch - 2ms/step
Epoch 7/1000
25/25 - 0s - loss: 1131.7386 - val_loss: 850.9998 - 59ms/epoch - 2ms/step
Epoch 8/1000
25/25 - 0s - loss: 999.4145 - val_loss: 907.0165 - 60ms/epoch - 2ms/step
Epoch 9/1000
25/25 - 0s - loss: 954.6367 - val_loss: 691.6975 - 66ms/epoch - 3ms/step
Epoch 10/1000
25/25 - 0s - loss: 682.8369 - val_loss: 504.9271 - 64ms/epoch - 3ms/step
Epoch 11/1000
25/25 - 0s - loss: 554.3415 - val_loss: 430.5194 - 66ms/epoch - 3ms/step
Epoch 12/1000
25/25 - 0s - loss: 51

<keras.callbacks.History at 0x29147e87520>

In [8]:
pred = model.predict(x_test)
score = np.sqrt(metrics.mean_squared_error(pred,y_test))
print("Final score (RMSE): {}".format(score))

Final score (RMSE): 6.008439590991187


In [10]:
from tabgan.sampler import GANGenerator

gen_x, gen_y = GANGenerator(gen_x_times=1.1, cat_cols=None,
           bot_filter_quantile=0.001, top_filter_quantile=0.999,
              is_post_process=True,
           adversarial_model_params={
               "metrics": "rmse", "max_depth": 2, "max_bin": 100, 
               "learning_rate": 0.02, "random_state":
                42, "n_estimators": 500,
           }, pregeneration_frac=2, only_generated_data=False
          ).generate_data_pipe(df_x_train, df_y_train,
          df_x_test, deep_copy=True, only_adversarial=False, 
          use_adversarial=True)



Fitting CTGAN transformers for each column:   0%|          | 0/67 [00:00<?, ?it/s]

Training CTGAN, epochs::   0%|          | 0/50 [00:00<?, ?it/s]

[1]	training's rmse: 0.498956	valid_1's rmse: 0.500517
[2]	training's rmse: 0.497952	valid_1's rmse: 0.501039
[3]	training's rmse: 0.496986	valid_1's rmse: 0.501566
[4]	training's rmse: 0.496056	valid_1's rmse: 0.502097
[5]	training's rmse: 0.495161	valid_1's rmse: 0.502632
[6]	training's rmse: 0.494301	valid_1's rmse: 0.503169
[7]	training's rmse: 0.493472	valid_1's rmse: 0.503769
[8]	training's rmse: 0.492675	valid_1's rmse: 0.50431
[9]	training's rmse: 0.491908	valid_1's rmse: 0.504911
[10]	training's rmse: 0.49117	valid_1's rmse: 0.505454
[11]	training's rmse: 0.490455	valid_1's rmse: 0.505679
[12]	training's rmse: 0.489757	valid_1's rmse: 0.506066
[13]	training's rmse: 0.488937	valid_1's rmse: 0.505941
[14]	training's rmse: 0.488268	valid_1's rmse: 0.506175
[15]	training's rmse: 0.487487	valid_1's rmse: 0.506068
[16]	training's rmse: 0.486938	valid_1's rmse: 0.506049
[17]	training's rmse: 0.486187	valid_1's rmse: 0.505958
[18]	training's rmse: 0.485711	valid_1's rmse: 0.506049
[19

In [11]:
pred = model.predict(gen_x.values)
score = np.sqrt(metrics.mean_squared_error(pred,gen_y.values))
print("Final score (RMSE): {}".format(score))

Final score (RMSE): 5.527769255483263


In [17]:
data_augmented = pd.concat([raw_data, gen_x])

In [21]:
data_augmented

Unnamed: 0,label,zero_crossing_rate,zero_crossings,spectrogram,mel_spectrogram,harmonics,perceptual_shock_wave,spectral_centroids,spectral_centroids_delta,spectral_centroids_accelerate,...,mfcc_accelerate_9,mfcc10,mfcc_delta_10,mfcc_accelerate_10,mfcc11,mfcc_delta_11,mfcc_accelerate_11,mfcc12,mfcc_delta_12,mfcc_accelerate_12
0,0,0.083045,55031.0,-43.481335,-59.466248,-0.000049,-0.000011,1784.122641,-0.490523,-0.061881,...,4.187489e-03,-8.326061,-0.002930,0.004329,8.802088,0.002346,-0.004537,-3.669941,0.000569,-0.004123
1,0,0.056040,37139.0,-39.419030,-57.107990,0.000141,-0.000180,1530.261767,0.645964,-0.085588,...,-8.625205e-03,-5.558824,-0.006495,0.000426,5.377876,-0.004285,-0.006501,-2.234492,0.008674,0.000504
2,0,0.076291,50563.0,-66.045040,-68.650800,-0.000002,-0.000020,1552.832481,0.340027,0.025039,...,-2.788004e-07,-13.125314,-0.006027,-0.005649,5.791246,-0.002034,-0.002225,-8.901966,0.005649,-0.000031
3,0,0.033309,22077.0,-70.386650,-79.072080,0.000004,-0.000018,1070.153418,0.251255,0.079369,...,-8.826724e-04,-3.200026,-0.010480,-0.003420,6.078082,-0.009716,0.001235,-2.478445,-0.002598,-0.000736
4,0,0.101461,67225.0,-55.221130,-79.911285,-0.000018,-0.000010,1835.128513,-0.172070,-0.245268,...,-3.368575e-03,-13.084959,0.002908,0.001244,-2.810499,0.003760,0.001076,-6.934471,-0.018504,-0.000860
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
437,1,0.075210,49844.0,-50.497253,-68.641000,-0.000027,-0.000098,1431.080147,-0.535642,0.096958,...,7.641606e-03,-9.563163,0.013230,0.004394,2.676858,0.006703,0.004897,-4.788712,0.018052,0.004444
438,0,0.058197,38548.0,-54.587210,-70.144350,-0.000002,-0.000106,1612.645637,0.279759,-0.088875,...,-8.075831e-03,-5.692567,0.002910,0.003638,1.383352,0.005887,0.001791,-5.828815,0.014399,-0.005769
439,5,0.063690,42205.0,-45.285650,-69.482460,0.000021,0.000122,1650.958287,0.047238,-0.535926,...,-7.450810e-03,-13.885361,0.002353,0.000531,8.284788,0.013918,-0.004170,-7.303147,0.017929,0.004137
440,2,0.060075,39795.0,-56.903572,-71.427216,0.000006,0.000113,1511.682407,-0.125670,0.167050,...,2.984683e-03,-6.500653,0.010500,-0.004183,2.901394,0.017913,0.002217,-11.492002,0.020197,0.003312


In [22]:
data_augmented.to_csv('./data/audio_data_genre_augmented.csv', index=False)