In [35]:
from utils import *
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from matplotlib import pyplot as plt 
from sklearn.svm import SVC


In [36]:
df = pd.read_csv("dataset/body_level_classification_train.csv")
df_original= cleaning_data(df).copy()
all_class_train,all_class_test=data_spliting(df_original,1/3)
## shuffling 
all_class_train = all_class_train.sample(frac=1.0, random_state=42)
all_class_test = all_class_test.sample(frac=1.0, random_state=42)
X_train=all_class_train.drop(["Body_Level"],axis=1).to_numpy()
Y_train=all_class_train["Body_Level"].to_numpy()
X_test =all_class_test.drop(["Body_Level"],axis=1).to_numpy()
Y_test =all_class_test["Body_Level"].to_numpy()


X_train, scaler= data_scaling(X_train)
X_val, X_test, Y_val, Y_test = train_test_split(X_test
                                                     , Y_test, test_size=0.5, random_state=42)
X_val = scaler.transform(X_val)
X_test=scaler.transform(X_test)

In [37]:
class_weights = all_class_train["Body_Level"].value_counts().to_dict()
total_num = sum(class_weights.values())
class_weights= { i : (total_num/(4* j )) for i ,j in zip(class_weights.keys(),class_weights.values())}
class_weights

{3: 0.5429515418502202,
 2: 0.9095940959409594,
 1: 1.8395522388059702,
 0: 1.9409448818897639}

In [38]:
SVM_Model = SVC(random_state=42,gamma='auto')
SVM_Model.fit(np.concatenate([X_train,X_val], axis=0)
                   ,np.concatenate([Y_train,Y_val],axis=0))

SVC(gamma='auto', random_state=42)

In [39]:
SVM_Model_accuracy , repo = test_model(SVM_Model,X_train,Y_train,axis=False)

In [40]:
SVM_Model_accuracy , repo = test_model(SVM_Model,X_train,Y_train,axis=False)
print(SVM_Model_accuracy)
print(repo)

0.9594320486815415
              precision    recall  f1-score   support

           0       0.95      0.98      0.96       127
           1       0.90      0.84      0.86       134
           2       0.93      0.96      0.95       271
           3       1.00      0.99      0.99       454

    accuracy                           0.96       986
   macro avg       0.94      0.94      0.94       986
weighted avg       0.96      0.96      0.96       986



In [41]:
SVM_Model_accuracy , repo = test_model(SVM_Model,X_test,Y_test,axis=False)

In [42]:
print(SVM_Model_accuracy)
print(repo)

0.8821138211382114
              precision    recall  f1-score   support

           0       0.86      0.86      0.86        28
           1       0.62      0.78      0.69        36
           2       0.94      0.84      0.89        80
           3       0.96      0.96      0.96       102

    accuracy                           0.88       246
   macro avg       0.85      0.86      0.85       246
weighted avg       0.89      0.88      0.89       246



## SVM weights 

In [43]:
SVM_Model_weights = SVC(random_state=42,gamma='auto', class_weight=class_weights)
SVM_Model_weights.fit(np.concatenate([X_train,X_val], axis=0)
                   ,np.concatenate([Y_train,Y_val],axis=0))

SVC(class_weight={0: 1.9409448818897639, 1: 1.8395522388059702,
                  2: 0.9095940959409594, 3: 0.5429515418502202},
    gamma='auto', random_state=42)

In [44]:
SVM_Model_weights_accuracy , repo = test_model(SVM_Model_weights,X_train,Y_train,axis=False)

In [45]:
print(SVM_Model_accuracy)
print(repo)

0.8821138211382114
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       127
           1       0.85      0.92      0.88       134
           2       0.93      0.94      0.94       271
           3       0.99      0.96      0.98       454

    accuracy                           0.96       986
   macro avg       0.94      0.96      0.95       986
weighted avg       0.96      0.96      0.96       986



In [46]:
SVM_Model_weights_accuracy , repo = test_model(SVM_Model_weights,X_test,Y_test,axis=False)

In [47]:
print(SVM_Model_accuracy)
print(repo)

0.8821138211382114
              precision    recall  f1-score   support

           0       0.89      0.86      0.87        28
           1       0.63      0.86      0.73        36
           2       0.94      0.84      0.89        80
           3       0.98      0.95      0.97       102

    accuracy                           0.89       246
   macro avg       0.86      0.88      0.86       246
weighted avg       0.91      0.89      0.89       246



## SVM  oversampling


In [48]:
oversampled= over_sampling(all_class_train,sampling_ratio=0.5)
class_weights = oversampled["Body_Level"].value_counts().to_dict()
total_num = sum(class_weights.values())
class_weights= { i : (total_num/(4* j )) for i ,j in zip(class_weights.keys(),class_weights.values())}
class_weights

X_train=oversampled.drop(["Body_Level"],axis=1).to_numpy()
Y_train=oversampled["Body_Level"].to_numpy()
X_test =all_class_test.drop(["Body_Level"],axis=1).to_numpy()
Y_test =all_class_test["Body_Level"].to_numpy()
X_train, scaler= data_scaling(X_train)
X_val, X_test, Y_val, Y_test = train_test_split(X_test
                                                     , Y_test, test_size=0.5, random_state=42)
X_val = scaler.transform(X_val)
X_test=scaler.transform(X_test)

In [49]:
SVM_Model = SVC(random_state=42,gamma='auto')
SVM_Model.fit(np.concatenate([X_train,X_val], axis=0)
                   ,np.concatenate([Y_train,Y_val],axis=0))

SVC(gamma='auto', random_state=42)

In [50]:
SVM_Model_accuracy , repo = test_model(SVM_Model,X_train,Y_train,axis=False)
print(SVM_Model_accuracy)
print(repo)

0.9741837509491268
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       257
           1       0.94      0.95      0.95       262
           2       0.96      0.96      0.96       344
           3       0.99      0.99      0.99       454

    accuracy                           0.97      1317
   macro avg       0.97      0.97      0.97      1317
weighted avg       0.97      0.97      0.97      1317



In [60]:
SVM_Model_accuracy , repo = test_model(SVM_Model,X_test,Y_test,axis=False)

In [61]:
print(SVM_Model_accuracy)
print(repo)

0.8861788617886179
              precision    recall  f1-score   support

           0       0.88      0.82      0.85        28
           1       0.62      0.78      0.69        36
           2       0.93      0.86      0.90        80
           3       0.97      0.96      0.97       102

    accuracy                           0.89       246
   macro avg       0.85      0.86      0.85       246
weighted avg       0.90      0.89      0.89       246



In [53]:
SVM_Model_weights = SVC(random_state=42,gamma='auto', class_weight=class_weights)
SVM_Model_weights.fit(np.concatenate([X_train,X_val], axis=0)
                   ,np.concatenate([Y_train,Y_val],axis=0))

SVC(class_weight={0: 1.2811284046692606, 1: 1.256679389312977,
                  2: 0.9571220930232558, 3: 0.7252202643171806},
    gamma='auto', random_state=42)

In [54]:
SVM_Model_weights_accuracy , repo = test_model(SVM_Model_weights,X_train,Y_train,axis=False)

In [55]:
print(SVM_Model_accuracy)
print(repo)

0.8861788617886179
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       257
           1       0.94      0.96      0.95       262
           2       0.95      0.96      0.96       344
           3       1.00      0.98      0.99       454

    accuracy                           0.97      1317
   macro avg       0.97      0.97      0.97      1317
weighted avg       0.97      0.97      0.97      1317



In [57]:
SVM_Model_weights_accuracy , repo = test_model(SVM_Model_weights,X_test,Y_test,axis=False)

In [58]:
SVM_Model_accuracy , repo = test_model(SVM_Model,X_test,Y_test,axis=False)

In [59]:
print(SVM_Model_weights_accuracy )
print(repo)

0.8983739837398373
              precision    recall  f1-score   support

           0       0.88      0.82      0.85        28
           1       0.62      0.78      0.69        36
           2       0.93      0.86      0.90        80
           3       0.97      0.96      0.97       102

    accuracy                           0.89       246
   macro avg       0.85      0.86      0.85       246
weighted avg       0.90      0.89      0.89       246



In [65]:
from sklearn import svm

parameters = {'C': [0.1, 1, 10, 100], 'gamma': [0.1, 0.01, 0.001, 0.0001], 'kernel': ['linear', 'rbf', 'sigmoid']}

# Create the SVM classifier
svm_classifier = svm.SVC()

# Perform grid search to find the best hyperparameters
grid_search = GridSearchCV(svm_classifier, parameters, cv=5,verbose = 1 )
grid_search.fit(np.concatenate([X_train,X_val], axis=0)
                   ,np.concatenate([Y_train,Y_val],axis=0))

# Print the best hyperparameters and the corresponding mean cross-validated score
print("Best Hyperparameters: ", grid_search.best_params_)
print("Best Score: ", grid_search.best_score_)

# Evaluate the model with the best hyperparameters on the test set
best_classifier = grid_search.best_estimator_
accuracy = best_classifier.score(X_test, Y_test)
print("Test Accuracy: ", accuracy)

Fitting 5 folds for each of 48 candidates, totalling 240 fits
Best Hyperparameters:  {'C': 100, 'gamma': 0.1, 'kernel': 'linear'}
Best Score:  0.9839927910215451
Test Accuracy:  0.983739837398374


In [68]:
cv_results = grid_search.cv_results_
for i, j  in zip(cv_results['mean_test_score'], cv_results['params']):
    print(i,j)

0.9250901122306873 {'C': 0.1, 'gamma': 0.1, 'kernel': 'linear'}
0.7963647906938641 {'C': 0.1, 'gamma': 0.1, 'kernel': 'rbf'}
0.6107663635618907 {'C': 0.1, 'gamma': 0.1, 'kernel': 'sigmoid'}
0.9250901122306873 {'C': 0.1, 'gamma': 0.01, 'kernel': 'linear'}
0.6203837961825183 {'C': 0.1, 'gamma': 0.01, 'kernel': 'rbf'}
0.5691427050053248 {'C': 0.1, 'gamma': 0.01, 'kernel': 'sigmoid'}
0.9250901122306873 {'C': 0.1, 'gamma': 0.001, 'kernel': 'linear'}
0.37003768329646924 {'C': 0.1, 'gamma': 0.001, 'kernel': 'rbf'}
0.37003768329646924 {'C': 0.1, 'gamma': 0.001, 'kernel': 'sigmoid'}
0.9250901122306873 {'C': 0.1, 'gamma': 0.0001, 'kernel': 'linear'}
0.37003768329646924 {'C': 0.1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.37003768329646924 {'C': 0.1, 'gamma': 0.0001, 'kernel': 'sigmoid'}
0.9641394281969362 {'C': 1, 'gamma': 0.1, 'kernel': 'linear'}
0.9269763250593922 {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}
0.5570185139673958 {'C': 1, 'gamma': 0.1, 'kernel': 'sigmoid'}
0.9641394281969362 {'C': 1, 'gamma

In [69]:
with open("SVM_Best_Modle","wb") as fb: 
            pickle.dump(grid_search.best_estimator_,fb)