In [1]:
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib

# # Load the preprocessed data from CSV
# df = pd.read_csv("new_preprocessed_data.csv")

# # Assuming 'X' is your feature set and 'y' is your target variable with genres encoded 0-9
# X = df.drop('music_genre', axis=1)  # Drop the target column to isolate features
# y = df['music_genre']  # Target column with genres encoded from 0 to 9

# # Split the dataset into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = pd.read_csv('./data/X_train.csv')
X_test = pd.read_csv('./data/X_test.csv')

y_train = pd.read_csv('./data/y_train.csv').values.ravel()
y_test = pd.read_csv('./data/y_test.csv').values.ravel()




In [2]:
from sklearn.model_selection import GridSearchCV 
  
# defining parameter range 
param_grid = {'C': [0.1, 1, 10],  
              'gamma': [0.1, 1, 10], 
              'kernel': ['rbf']}  
  
grid = GridSearchCV(svm.SVC(), param_grid, refit = True, verbose = 3, n_jobs=-1) 
  
# fitting the model for grid search 
grid.fit(X_train, y_train)


# print best parameter after tuning 
print(grid.best_params_) 
  
# print how our model looks after hyper-parameter tuning 
print(grid.best_estimator_) 

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.656 total time= 1.4min
[CV 2/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.659 total time= 1.3min
[CV 3/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.652 total time= 1.4min
[CV 4/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.664 total time= 1.2min
[CV 5/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.647 total time= 1.2min
[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.101 total time= 1.4min
[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.101 total time= 1.3min
[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.101 total time= 1.5min
[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.101 total time= 1.5min
[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.101 total time= 1.5min
[CV 1/5] END .......C=0.1, gamma=10, kernel=rbf;, score=0.101 total time= 1.4min
[CV 2/5] END .......C=0.1, gamma=10, kernel=rbf;,

In [3]:
joblib.dump(grid.best_estimator_, './models/best_rbf_svm.pkl')

['./models/best_rbf_svm.pkl']

In [4]:
grid_predictions = grid.predict(X_test) 
  
# print classification report 
print(classification_report(y_test, grid_predictions)) 

              precision    recall  f1-score   support

           0       0.68      0.75      0.71       856
           1       0.96      0.94      0.95       919
           2       0.83      0.87      0.85       873
           3       0.86      0.95      0.90       881
           4       0.85      0.86      0.86       921
           5       0.91      0.90      0.90       931
           6       0.54      0.63      0.58       905
           7       0.88      0.82      0.85       888
           8       0.44      0.30      0.36       914
           9       0.61      0.61      0.61       916

    accuracy                           0.76      9004
   macro avg       0.76      0.76      0.76      9004
weighted avg       0.76      0.76      0.76      9004

