Ensemble with multiple mixed inputs

In [30]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras import layers, models, Input
import tensorflow as tf

In [31]:
##setting the directory path for the spectograms
import os
images = os.listdir('Data/images_original')

In [32]:
##checking the path is set properly
images

['.DS_Store',
 'blues',
 'classical',
 'country',
 'disco',
 'hiphop',
 'jazz',
 'metal',
 'pop',
 'reggae',
 'rock']

In [33]:
##getting the  folder names inside the images folder
genre_folders = [f for f in os.listdir('Data/images_original') if not f.startswith('.')]

##creating  a datarfame and setting it to the folder folder path
images_df = pd.DataFrame({
    'Names': genre_folders,
    'Path': [os.path.join('Data/images_original', genre) for genre in genre_folders]
})



In [34]:
##checking the new dataframe works
images_df

Unnamed: 0,Names,Path
0,blues,Data/images_original\blues
1,classical,Data/images_original\classical
2,country,Data/images_original\country
3,disco,Data/images_original\disco
4,hiphop,Data/images_original\hiphop
5,jazz,Data/images_original\jazz
6,metal,Data/images_original\metal
7,pop,Data/images_original\pop
8,reggae,Data/images_original\reggae
9,rock,Data/images_original\rock


In [35]:
##setting the path for the csv file
csv_path = 'Data/features_30_sec.csv'


In [36]:
##checking the path is set properly
csv_path

'Data/features_30_sec.csv'

In [37]:
##setting the size of the picture 
image_size = (128, 128)

In [38]:
##loading the csv file
df = pd.read_csv(csv_path)

In [39]:
##dropping the length column on the csv file
df = df.drop(columns=["length"]) 

In [40]:
##extracting the genre 
df['label'] = df['filename'].apply(lambda x: x.split('.')[0]) 

In [41]:
##dropping the  columns we dont need 
columns_to_drop = ['filename', 'length', 'label']
X_tab = df.drop(columns=[col for col in columns_to_drop if col in df.columns])

##scaling the remaining columns using standardscaler
scaler = StandardScaler()
X_tab_scaled = scaler.fit_transform(X_tab)


In [42]:
##converting  the labels in the datafram to numbers 
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df["label"])

##converting the numbers to one-hotencoding 
y_categorical = to_categorical(y)

##checking how mani classes we have so like our genres
num_genres = y_categorical.shape[1]


In [43]:
##loadingthe images on a list of filenames and then we return them as a numoy array
def load_spectrogram_images(filenames, base_dir, target_size):
    data = []
    for name in filenames:
        ##extracting the genre and the image so like  blues000000.png wesplitinto blue sand the number
        genre = name.split('.')[0]          
        img_name = name.split('.')[1] + '.png' 

        ##bthe full path to the spectogram image 
        img_path = os.path.join(base_dir, genre, img_name)
        
        ##if the file existsweload it conver it into an array and scale the pixel values
        if os.path.exists(img_path):
            img = load_img(img_path, target_size=target_size)
            img = img_to_array(img) / 255.0
            data.append(img)
        else:
            ##if  the file doesn´t exist i throw a warning and i add an array with 0s to store the place like to keep running  so we make like a fake image prop with the same shape
            print(f"Warning: {img_path} not found. Skipping.")
            data.append(np.zeros((*target_size, 3))) 
            
    ##then i just get the array back from the listof images
    return np.array(data)

In [44]:
##setting the image directory for the model later
image_dir = 'Data/images_original'

In [45]:
##loading the spectogram images and resizing them
X_img = load_spectrogram_images(df['filename'].values, image_dir, image_size)

In [46]:
##splitting into train and testing set
X_img_train, X_img_test, X_tab_train, X_tab_test, y_train, y_test = train_test_split(
    X_img, X_tab_scaled, y_categorical, test_size=0.2, random_state=42
)

In [47]:
##creating the cnn model
image_input = Input(shape=(image_size[0], image_size[1], 3))
##layer 1
convolayer1 = layers.Conv2D(32, (3,3), activation='relu')(image_input)
convolayer1 = layers.MaxPooling2D((2,2)) (convolayer1)

##adding a second layer
convolayer2 = layers.Conv2D(32, (3,3), activation='relu')(convolayer1)
convolayer2 = layers.MaxPooling2D((2,2)) (convolayer2)
convolayer2 = layers.Flatten()(convolayer2)
convolayer2 = layers.Dense(64, activation='relu')(convolayer2)

image_branch = models.Model(inputs=image_input, outputs=convolayer2)



In [48]:
##defining the input layer for the table daya using the number of columns 
table_input = Input(shape=(X_tab_scaled.shape[1],))

##passing the table data through 2 connected dense layers
dense1 = layers.Dense(64, activation='relu')(table_input)
dense2 = layers.Dense(32, activation='relu')(dense1)

#creating a model object called table branch after running it through the layers
table_branch = models.Model(inputs=table_input, outputs=dense2)


In [49]:
##combining the csv and the images
combined = layers.concatenate([image_branch.output, table_branch.output])
densecomb = layers.Dense(64, activation='relu')(combined)
densecomb = layers.Dropout(0.5)(densecomb)
densecomb = layers.Dense(num_genres, activation='softmax')(densecomb)

In [50]:
##building the combined model from the 2 branches above with an optimizer adam
model = models.Model(inputs=[image_branch.input, table_branch.input], outputs=densecomb)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [51]:
##training the model
model.fit([X_img_train, X_tab_train], y_train,
           validation_data=([X_img_test, X_tab_test], y_test),
           epochs=25,
           batch_size=32)

Epoch 1/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 130ms/step - accuracy: 0.1481 - loss: 2.3342 - val_accuracy: 0.3700 - val_loss: 1.9013
Epoch 2/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 111ms/step - accuracy: 0.3053 - loss: 1.8940 - val_accuracy: 0.4800 - val_loss: 1.6329
Epoch 3/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 104ms/step - accuracy: 0.4168 - loss: 1.6423 - val_accuracy: 0.5600 - val_loss: 1.4035
Epoch 4/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 100ms/step - accuracy: 0.5115 - loss: 1.4128 - val_accuracy: 0.6200 - val_loss: 1.1907
Epoch 5/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 99ms/step - accuracy: 0.5920 - loss: 1.1269 - val_accuracy: 0.6650 - val_loss: 1.0253
Epoch 6/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 99ms/step - accuracy: 0.6696 - loss: 1.0064 - val_accuracy: 0.7050 - val_loss: 0.8965
Epoch 7/25
[1m25/25[0m [32m

<keras.src.callbacks.history.History at 0x223ff555970>

In [52]:
##printing out the accuracy 
loss, accuracy = model.evaluate([X_img_test, X_tab_test], y_test)
print(f"accuracy: {accuracy * 100:.2f}%")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.9272 - loss: 0.4276
accuracy: 93.00%
