<a href="https://colab.research.google.com/github/RJRuhan/notebook/blob/main/SVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%cd /content/drive/MyDrive/Thesis/codes
%ls

/content/drive/MyDrive/Thesis/codes
extract_feature.py  IO_helpers.py      plot.py        [0m[01;34m__pycache__[0m/  verisense_step_count.py
features.csv        [01;34mmodified_dataset[0m/  preprocess.py  trainData.py


In [11]:
import os
import numpy as np
import tensorflow as tf
from sklearn.metrics import confusion_matrix, classification_report, f1_score, precision_recall_fscore_support
from sklearn import preprocessing
from sklearn import svm
import matplotlib.pylab as plt
from plot import *
from extract_feature import *
from trainData import *

%matplotlib

Using matplotlib backend: agg


In [4]:
X_train_sets, Y_train_sets, X_test_sets, Y_test_sets, X_validation_set, Y_validation_set, X_train, Y_train = load_dataset_feature_gender('features.csv', n_folds=7)
print(len(X_train_sets))
print(X_train_sets[0].shape)
print(Y_train_sets[0].shape)
print(X_test_sets[0].shape)
print(Y_test_sets[0].shape)
print(X_validation_set.shape)
print(Y_validation_set.shape)
print(X_train.shape)
print(Y_train.shape)

7
(266, 409)
(266,)
(46, 409)
(46,)
(134, 409)
(134,)
(312, 409)
(312,)


In [5]:
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_validation_set = scaler.transform(X_validation_set)

num_set = len(X_train_sets)

for i in range(num_set):
  X_train_sets[i] = scaler.transform(X_train_sets[i])
  X_test_sets[i] = scaler.transform(X_test_sets[i])





In [14]:
def kernel_opt():
    label_names = ['male','female']
    kernels = [ 'linear', 'poly', 'rbf', 'sigmoid' ]
    num_set = len(X_train_sets)

    for kern in kernels:

      precisions = []
      recalls = []
      f1_scores = []

      print('Kernel: ', kern)

      for i in range(num_set):

        clf_r_l = svm.SVC(kernel=kern)
        clf_r_l.fit(X_train_sets[i], Y_train_sets[i])
        y_pred_te = clf_r_l.predict(X_test_sets[i])

        # print('Training set report')
        # print(classification_report(Y_train_sets[i], y_pred_tr, target_names=label_names))
        # print('Test set report')
        # print(classification_report(Y_test_sets[i], y_pred_te, target_names=label_names))

        precision, recall, f1_score, _ = precision_recall_fscore_support(Y_test_sets[i], y_pred_te, average='macro')

        precisions.append(precision)
        recalls.append(recall)
        f1_scores.append(f1_score)

      print('Kernel: ', kern)
      print('Average precision: ', np.mean(precisions))
      print('Average recall: ', np.mean(recalls))
      print('Average f1 score: ', np.mean(f1_scores))

kernel_opt()


Kernel:  linear
Kernel:  linear
Average precision:  0.6731810922456175
Average recall:  0.6701016374929419
Average f1 score:  0.668665804277336
Kernel:  poly
Kernel:  poly
Average precision:  0.677563045609012
Average recall:  0.6177300959909655
Average f1 score:  0.5810000898586912
Kernel:  rbf
Kernel:  rbf
Average precision:  0.7507227461908783
Average recall:  0.7394127611518916
Average f1 score:  0.7375424266267858
Kernel:  sigmoid
Kernel:  sigmoid
Average precision:  0.7601074294683318
Average recall:  0.7526821005081875
Average f1 score:  0.7507910529187125


In [18]:
def coeff_opt(param_range_C,kern):
    num_set = len(X_train_sets)
    print("\nkernel : ", kern)
    for i,C in enumerate(param_range_C):
      precisions = []
      recalls = []
      f1_scores = []

      for i in range(num_set):
          clf_r_l = svm.SVC(kernel=kern, C=C)
          clf_r_l.fit(X_train_sets[i], Y_train_sets[i])
          y_pred_te = clf_r_l.predict(X_test_sets[i])

          precision, recall, f1_score, _ = precision_recall_fscore_support(Y_test_sets[i], y_pred_te, average='macro')

          precisions.append(precision)
          recalls.append(recall)
          f1_scores.append(f1_score)

      print('\nCoeff: ', C)
      print('Average precision: ', np.mean(precisions))
      print('Average recall: ', np.mean(recalls))
      print('Average f1 score: ', np.mean(f1_scores))

    return

param_range = np.array([0.01, 0.1, 1, 10, 100, 1000])
coeff_opt(param_range,'rbf')
coeff_opt(param_range,'sigmoid')



kernel :  rbf

Coeff:  0.01
Average precision:  0.7067281862552937
Average recall:  0.682523997741389
Average f1 score:  0.674167446230171

Coeff:  0.1
Average precision:  0.7067281862552937
Average recall:  0.682523997741389
Average f1 score:  0.674167446230171

Coeff:  1.0
Average precision:  0.7507227461908783
Average recall:  0.7394127611518916
Average f1 score:  0.7375424266267858

Coeff:  10.0
Average precision:  0.7476148463337904
Average recall:  0.7367306606437042
Average f1 score:  0.7347793631919554

Coeff:  100.0
Average precision:  0.7476148463337904
Average recall:  0.7367306606437042
Average f1 score:  0.7347793631919554

Coeff:  1000.0
Average precision:  0.7476148463337904
Average recall:  0.7367306606437042
Average f1 score:  0.7347793631919554

kernel :  sigmoid

Coeff:  0.01
Average precision:  0.6872829179518697
Average recall:  0.6722190852625636
Average f1 score:  0.6683274393072958

Coeff:  0.1
Average precision:  0.7259855566593879
Average recall:  0.714003387

In [22]:
def gamma_opt(param_range_gamma,kern):
    num_set = len(X_train_sets)
    print("\nkernel : ", kern)
    for i,G in enumerate(param_range_gamma):
      precisions = []
      recalls = []
      f1_scores = []

      for i in range(num_set):
          clf_r_l = svm.SVC(kernel=kern, gamma=G, C=1)
          clf_r_l.fit(X_train_sets[i], Y_train_sets[i])
          y_pred_te = clf_r_l.predict(X_test_sets[i])

          precision, recall, f1_score, _ = precision_recall_fscore_support(Y_test_sets[i], y_pred_te, average='macro')

          precisions.append(precision)
          recalls.append(recall)
          f1_scores.append(f1_score)

      print('\nGamma : ', G)
      print('\nAverage precision: ', np.mean(precisions))
      print('Average recall: ', np.mean(recalls))
      print('Average f1 score: ', np.mean(f1_scores))

    return

param_range = np.array(['scale','auto'])
gamma_opt(param_range,'rbf')
gamma_opt(param_range,'sigmoid')



kernel :  rbf

Gamma :  scale

Average precision:  0.7507227461908783
Average recall:  0.7394127611518916
Average f1 score:  0.7375424266267858

Gamma :  auto

Average precision:  0.7507227461908783
Average recall:  0.7394127611518916
Average f1 score:  0.7375424266267858

kernel :  sigmoid

Gamma :  scale

Average precision:  0.7601074294683318
Average recall:  0.7526821005081875
Average f1 score:  0.7507910529187125

Gamma :  auto

Average precision:  0.7608049745104337
Average recall:  0.7528232636928289
Average f1 score:  0.7507857276640975


In [27]:
label_names = ['male','female']


print('kernel : rbf\n')

machine = svm.SVC(kernel='rbf',C=1)
machine.fit(X_train, Y_train)

y_pred_cov = machine.predict(X_validation_set)
print('\nValidation set report')
print(classification_report(Y_validation_set, y_pred_cov, target_names=label_names))

print('kernel : sigmoid\n')
machine = svm.SVC(kernel='sigmoid',C=1)
machine.fit(X_train, Y_train)

y_pred_cov = machine.predict(X_validation_set)
print('\nValidation set report')
print(classification_report(Y_validation_set, y_pred_cov, target_names=label_names))

print('kernel : linear\n')
machine = svm.SVC(kernel='linear',C=1)
machine.fit(X_train, Y_train)

y_pred_cov = machine.predict(X_validation_set)
print('\nValidation set report')
print(classification_report(Y_validation_set, y_pred_cov, target_names=label_names))


print('kernel : poly\n')
machine = svm.SVC(kernel='poly',C=1)
machine.fit(X_train, Y_train)

y_pred_cov = machine.predict(X_validation_set)
print('\nValidation set report')
print(classification_report(Y_validation_set, y_pred_cov, target_names=label_names))


kernel : rbf


Validation set report
              precision    recall  f1-score   support

        male       0.69      0.66      0.67        67
      female       0.67      0.70      0.69        67

    accuracy                           0.68       134
   macro avg       0.68      0.68      0.68       134
weighted avg       0.68      0.68      0.68       134

kernel : sigmoid


Validation set report
              precision    recall  f1-score   support

        male       0.70      0.55      0.62        67
      female       0.63      0.76      0.69        67

    accuracy                           0.66       134
   macro avg       0.66      0.66      0.65       134
weighted avg       0.66      0.66      0.65       134

kernel : linear


Validation set report
              precision    recall  f1-score   support

        male       0.68      0.67      0.68        67
      female       0.68      0.69      0.68        67

    accuracy                           0.68       134
   macro a

In [None]:
# def run_svm(machine):

#     num_set = len(X_train_sets)

#     precisions = []
#     recalls = []
#     f1_scores = []

#     for i in range(num_set):

#       machine.fit(X_train_sets[i], Y_train_sets[i])
#       y_pred_te = machine.predict(X_test_sets[i])

#       precision, recall, f1_score, _ = precision_recall_fscore_support(Y_test_sets[i], y_pred_te, average='macro')

#       precisions.append(precision)
#       recalls.append(recall)
#       f1_scores.append(f1_score)

#       print('\nAverage precision: ', np.mean(precisions))
#       print('Average recall: ', np.mean(recalls))
#       print('Average f1 score: ', np.mean(f1_scores))

#     label_names = ['male','female']
#     y_pred_cov = machine.predict(X_validation_set)
#     print('\nValidation set report')
#     print(classification_report(Y_validation_set, y_pred_cov, target_names=label_names))

# print('kernel : rbf\n')
# machine = svm.SVC(kernel='rbf',C=1)
# run_svm(machine)

# print('kernel : sigmoid\n')
# machine = svm.SVC(kernel='sigmoid',C=1)
# run_svm(machine)


In [23]:
# def hyperparameters_opt(X_train, y_train, X_test, y_test,
#                             param_range_C, param_range_gamma,kern):
#     accuracy_matrix = np.zeros((param_range_C.shape[0],param_range_gamma.shape[0]))
#     for i,C in enumerate(param_range_C):
#         for j,gamma in enumerate(param_range_gamma):
#             clf_r_l = svm.SVC(kernel=kern, C=C, gamma=gamma)
#             clf_r_l.fit(X_train, y_train)
#             y_pred_tr = clf_r_l.predict(X_train_sets[i])
#             y_pred_te = clf_r_l.predict(X_test_sets[i])

#             accuracy_matrix[i,j] = f1_score(y_test ,y_pred_te, average='macro')

#     C_ind, gamma_ind = np.unravel_index(np.argmax(accuracy_matrix), accuracy_matrix.shape)
#     # print(accuracy_matrix)

#     return param_range_C[C_ind], param_range_gamma[gamma_ind]


# num_set = len(X_train_sets)

# for i in range(num_set):
#   # C and gamma test range
#   param_range = np.array([0.01, 0.1, 1, 10, 100, 1000])
#   # Optimizing C and gamma parameters to achieve the best f1 score
#   C, gamma = hyperparameters_opt(X_train_sets[i], Y_train_sets[i], X_test_sets[i], Y_test_sets[i], param_range, param_range, kernel)

#   print("Hyperparameters optimization results:")
#   print("C: {}".format(C))
#   print("gamma: {}".format(gamma))
#   print("\n")


