In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from tabulate import tabulate
import math

def main():
  column_names = ["word_freq_make", "word_freq_address" ,"word_freq_all","word_freq_3d","word_freq_our","word_freq_over" ,"word_freq_remove", "word_freq_internet",
                  "word_freq_order" ,"word_freq_mail" ,"word_freq_receive" ,"word_freq_will","word_freq_people","word_freq_report","word_freq_addresses","word_freq_free",
                  "word_freq_business","word_freq_email","word_freq_you","word_freq_credit","word_freq_your","word_freq_font","word_freq_000","word_freq_money","word_freq_hp",
                  "word_freq_hpl","word_freq_george","word_freq_650","word_freq_lab","word_freq_labs","word_freq_telnet","word_freq_857","word_freq_data","word_freq_415","word_freq_85",
                  "word_freq_technology","word_freq_1999","word_freq_parts","word_freq_pm","word_freq_direct" ,"word_freq_cs","word_freq_meeting" ,"word_freq_original" ,"word_freq_project",
                  "word_freq_re","word_freq_edu","word_freq_table","word_freq_conference","char_freq_;","char_freq_(" ,"char_freq_[ " ,"char_freq_!", "char_freq_$", "char_freq_#" ,
                  "capital_run_length_average","capital_run_length_longest","capital_run_length_total","spam"]
                  
  df = pd.read_csv("/content/sample_data/spambase.data",sep=",", names=column_names)
  y = df['spam']
  X = df.drop('spam', axis=1)
 
  scaler = MinMaxScaler()
  X = scaler.fit_transform(X)

  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
 
  print("#############################################################")
  print("                        SUPPORT VECTOR MACHINE               ")
  print("#############################################################")

  print("---------------------------------")
  print("         a) Linear Kernel        ")
  print("---------------------------------")

  linear_C = []
  linear_train = []
  linear_test = []
  linear_bunch = []

  for i in range(10):
        # print(i)
        clf_linear = SVC(kernel='linear', C=(1 / 10000) * np.power(10, i))
        clf_linear.fit(X_train, y_train)
        y_pred_linear_train = clf_linear.predict(X_train)
        y_pred_linear_test = clf_linear.predict(X_test)
        linear_C.append((1 / 10000) * np.power(10, i))
        linear_train.append(accuracy_score(y_train, y_pred_linear_train))
        linear_test.append(accuracy_score(y_test, y_pred_linear_test))
        linear_bunch.append([str(i+1), str(linear_C[i]), str(round(linear_train[i]*100,2)), str(round(linear_test[i]*100, 2))])
  print(tabulate(linear_bunch, headers=["Index", "C", "Train_Accuracy", "Test_Accuracy"], tablefmt='orgtbl'))

  maxpos_linear = linear_test.index(max(linear_test))
  print("\n++++ For SVM with a linear Kernel,")
  print("=> Best Test Accuracy is obtained for C = " + str(linear_C[maxpos_linear]))
  print("=> For this C, Train Accuracy = " + str(round(linear_train[maxpos_linear]*100,2)))
  print("=> For this C, Test Accuracy  = " + str(round(linear_test[maxpos_linear]*100,2)) + "\n")

  print("---------------------------------")
  print("         b) Quadratic Kernel     ")
  print("---------------------------------")

  quadratic_C = []
  quadratic_train = []
  quadratic_test = []
  quadratic_bunch=[]

  for i in range(10):
        # print(i)
        clf_quadratic = SVC(kernel='poly', C=(1 / 10000) * np.power(10, i), degree = 2)
        clf_quadratic.fit(X_train, y_train)
        y_pred_quadratic_train = clf_quadratic.predict(X_train)
        y_pred_quadratic_test = clf_quadratic.predict(X_test)
        quadratic_C.append((1 / 10000) * np.power(10, i))
        quadratic_train.append(accuracy_score(y_train, y_pred_quadratic_train))
        quadratic_test.append(accuracy_score(y_test, y_pred_quadratic_test))
        quadratic_bunch.append([str(i+1), str(quadratic_C[i]), str(round(quadratic_train[i]*100,2)), str(round(quadratic_test[i]*100, 2))])
  print(tabulate(quadratic_bunch, headers=["Index", "C", "Train_Accuracy", "Test_Accuracy"], tablefmt='orgtbl'))

  maxpos_quadratic = quadratic_test.index(max(quadratic_test))
  print("\n++++ For SVM with a quadratic Kernel,")
  print("=> Best Test Accuracy is obtained for C = " + str(quadratic_C[maxpos_quadratic]))
  print("=> For this C, Train Accuracy " + str(round(quadratic_train[maxpos_quadratic]*100,2)))
  print("=> For this C, Test Accuracy " + str(round(quadratic_test[maxpos_quadratic]*100,2)) + "\n")

  print("---------------------------------")
  print("       c) Radial Basis Kernel    ")
  print("---------------------------------")

  rad_bas_C = []
  rad_bas_train = []
  rad_bas_test = []
  rad_bas_bunch = []
  for i in range(10):
        clf_rad_bas = SVC(C=(1 / 100) * np.power(10, i))
        clf_rad_bas.fit(X_train, y_train)
        y_pred_rad_bas_train = clf_rad_bas.predict(X_train)
        y_pred_rad_bas_test = clf_rad_bas.predict(X_test)
        rad_bas_C.append((1 / 100) * np.power(10, i))
        rad_bas_train.append(accuracy_score(y_train, y_pred_rad_bas_train))
        rad_bas_test.append(accuracy_score(y_test, y_pred_rad_bas_test))
        rad_bas_bunch.append([str(i+1), str(rad_bas_C[i]), str(round(rad_bas_train[i]*100,2)), str(round(rad_bas_test[i]*100, 2))])
  print(tabulate(rad_bas_bunch, headers=["Index", "C", "Train_Accuracy", "Test_Accuracy"], tablefmt='orgtbl'))

  maxpos_rad_bas = rad_bas_test.index(max(rad_bas_test))
  print("\n++++ For SVM with a Radial Basis Kernel,")
  print("=> Best Test Accuracy is obtained, for C = " + str(rad_bas_C[maxpos_rad_bas]))
  print("=> For this C, Train data accuracy " + str(round(rad_bas_train[maxpos_rad_bas]*100,2)))
  print("=> For this C, Test data accuracy " + str(round(rad_bas_test[maxpos_rad_bas]*100,2)) + "\n")
  print()
  print()
  print()

if __name__ == "__main__":
    main()

#############################################################
                        SUPPORT VECTOR MACHINE               
#############################################################
---------------------------------
         a) Linear Kernel        
---------------------------------
|   Index |           C |   Train_Accuracy |   Test_Accuracy |
|---------+-------------+------------------+-----------------|
|       1 |      0.0001 |            61.14 |           58.41 |
|       2 |      0.001  |            61.14 |           58.41 |
|       3 |      0.01   |            61.3  |           57.87 |
|       4 |      0.1    |            84.76 |           84.04 |
|       5 |      1      |            90.14 |           89.36 |
|       6 |     10      |            92.85 |           91.53 |
|       7 |    100      |            93.59 |           91.53 |
|       8 |   1000      |            93.83 |           91.31 |
|       9 |  10000      |            93.51 |           91.21 |
|      10 | 100000 