In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical

In [None]:
 data = pd.read_csv('genres_v2.csv')  # Replace with the correct path

# Drop unnecessary columns
columns_to_drop = ['id', 'uri', 'track_href', 'analysis_url', 'Unnamed: 0', 'title', 'song_name']
data_cleaned = data.drop(columns=columns_to_drop, errors='ignore')

# Drop rows with missing values
data_cleaned.dropna(inplace=True)

# Select numerical features and target
numerical_features = ['danceability', 'energy', 'key', 'loudness', 'speechiness',
                      'acousticness', 'instrumentalness', 'liveness', 'valence',
                      'tempo', 'duration_ms', 'time_signature']
target_column = 'genre'

# Extract features (X) and target (y)
X = data_cleaned[numerical_features]
y = data_cleaned[target_column]

# Normalize features using Min-Max scaling
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# One-hot encode target labels for deep learning
y_onehot = to_categorical(y_encoded)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_onehot, test_size=0.1, random_state=42)
3


  data = pd.read_csv('genres_v2.csv')  # Replace with the correct path


3

In [None]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, np.argmax(y_train, axis=1))  # Use numerical labels for Random Forest

# Predict on the test set
y_pred_rf = rf.predict(X_test)

# Evaluate Random Forest performance
accuracy_rf = accuracy_score(np.argmax(y_test, axis=1), y_pred_rf)
precision_rf = precision_score(np.argmax(y_test, axis=1), y_pred_rf, average='weighted')
recall_rf = recall_score(np.argmax(y_test, axis=1), y_pred_rf, average='weighted')
f1_rf = f1_score(np.argmax(y_test, axis=1), y_pred_rf, average='weighted')

print(f"Random Forest - Accuracy: {accuracy_rf}, Precision: {precision_rf}, Recall: {recall_rf}, F1: {f1_rf}")

Random Forest - Accuracy: 0.6707634129047506, Precision: 0.6693808927022121, Recall: 0.6707634129047506, F1: 0.665851053057167


In [None]:
# Build a Neural Network
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(y_train.shape[1], activation='softmax')  # Output layer
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=64, validation_split=0.1, verbose=1)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Neural Network - Test Accuracy: {test_accuracy}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.2441 - loss: 2.2423 - val_accuracy: 0.4730 - val_loss: 1.5551
Epoch 2/50
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.4337 - loss: 1.6332 - val_accuracy: 0.5305 - val_loss: 1.3882
Epoch 3/50
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.4843 - loss: 1.4924 - val_accuracy: 0.5565 - val_loss: 1.3041
Epoch 4/50
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5123 - loss: 1.4207 - val_accuracy: 0.5885 - val_loss: 1.2372
Epoch 5/50
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5406 - loss: 1.3402 - val_accuracy: 0.5990 - val_loss: 1.1822
Epoch 6/50
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5490 - loss: 1.3031 - val_accuracy: 0.6061 - val_loss: 1.1415
Epoch 7/50
[1m536/536[0m 

In [None]:
# Neural Network Predictions
y_pred_nn = np.argmax(model.predict(X_test), axis=1)

# Random Forest Results
print(f"Random Forest Metrics:\n"
      f"Accuracy: {accuracy_rf}, Precision: {precision_rf}, Recall: {recall_rf}, F1 Score: {f1_rf}")

# Neural Network Classification Report
from sklearn.metrics import classification_report
print("Neural Network Classification Report:")
print(classification_report(np.argmax(y_test, axis=1), y_pred_nn))


[1m133/133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Random Forest Metrics:
Accuracy: 0.6707634129047506, Precision: 0.6693808927022121, Recall: 0.6707634129047506, F1 Score: 0.665851053057167
Neural Network Classification Report:
              precision    recall  f1-score   support

           0       0.64      0.38      0.48       484
           1       0.60      0.63      0.62       177
           2       0.59      0.36      0.44       312
           3       0.50      0.02      0.03        60
           4       0.70      0.25      0.37       165
           5       0.41      0.38      0.39       196
           6       0.60      0.29      0.39       178
           7       0.43      0.78      0.56       592
           8       0.93      0.97      0.95       298
           9       0.81      0.92      0.86       308
          10       0.93      0.87      0.90       295
          11       0.78      0.91      0.84       273
          12       0.85      0.75      0.80 

In [None]:
import joblib
# Save Random Forest model
joblib.dump(rf, 'random_forest_genre.pkl')

# Save Neural Network model
model.save('neural_network_genre.h5')



In [None]:
#Load Random Forest model
rf = joblib.load('random_forest_genre.pkl')

# Predict on new data (ensure it's preprocessed like X_test)
new_data = np.array([[0.7, 0.8, 5, -5, 0.2, 0.6, 0.0, 0.1, 0.9, 120, 200000, 4]])
new_data_scaled = scaler.transform(new_data)
predicted_genre_rf = label_encoder.inverse_transform(rf.predict(new_data_scaled))
print(f"Predicted Genre (Random Forest): {predicted_genre_rf}")

# Load Neural Network model
from tensorflow.keras.models import load_model
model = load_model('neural_network_genre.h5')

# Predict with Neural Network
predicted_genre_nn = label_encoder.inverse_transform(np.argmax(model.predict(new_data_scaled), axis=1))
print(f"Predicted Genre (Neural Network): {predicted_genre_nn}")



Predicted Genre (Random Forest): ['Underground Rap']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
Predicted Genre (Neural Network): ['Underground Rap']


In [None]:
model.save('neural_network_genre.h5')


