In [1]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.utils import shuffle

data = pd.read_csv('Dataset.csv')
data.iloc[np.random.permutation(len(data))]
data = data.reset_index(drop=True)
def clean_dataset(df):
    assert isinstance(df, pd.DataFrame), "df needs to be a pd.DataFrame"
    df.dropna(inplace=True)
    indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)
    return df[indices_to_keep].astype(np.float64)
data = clean_dataset(data)


In [2]:
from sklearn import svm
from sklearn.metrics import classification_report

target = data.columns[-1]
feature = data.drop(target, axis = 1)
label = data.drop(feature, axis = 1)
feature_train, feature_test, label_train, label_test = train_test_split(feature, label,test_size=0.2)

In [3]:
from sklearn.model_selection import KFold,cross_validate
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score
scoring =['accuracy']

kfold = KFold(n_splits=10, shuffle=True)
def get_avg_eva(clf):
    results = cross_validate (estimator=clf,
                                X=feature,
                                y=label,
                                cv=kfold,
                                scoring=scoring,)
    print('score_time'+str(np.mean(results['score_time'])))
    print('accuracy:' + str(np.mean(results['test_accuracy'])))
    return results

In [4]:
from sklearn.model_selection import cross_val_score
import joblib
clf = svm.SVC()
print("RBF: ")
result_RBF = get_avg_eva(clf)
clf.fit(feature_train, label_train)
pred = clf.predict(feature_test)
print(classification_report(label_test,pred))
joblib.dump(clf,'clf.joblib')

RBF: 
score_time8.468435597419738
accuracy:0.9107008520277924
              precision    recall  f1-score   support

         0.0       0.90      0.98      0.94      1736
         1.0       0.93      0.69      0.79       647

    accuracy                           0.90      2383
   macro avg       0.91      0.84      0.87      2383
weighted avg       0.90      0.90      0.90      2383



['clf.joblib']

In [5]:
import math
from sklearn.metrics.pairwise import euclidean_distances
def log_kernel(x,y):
    kernel = euclidean_distances(x,y)
    kernel = -np.log(kernel+1)
    return kernel

clf_log = svm.SVC(kernel = log_kernel)
print("log: ")
result_log = get_avg_eva(clf_log) 
clf_log.fit(feature_train, label_train)
pred_log = clf_log.predict(feature_test)
print(classification_report(label_test,pred_log))
joblib.dump(clf_log,'clf_log.joblib')

log: 
score_time1.0311553478240967
accuracy:0.9529997069745686
              precision    recall  f1-score   support

         0.0       0.95      0.98      0.97      1736
         1.0       0.95      0.87      0.90       647

    accuracy                           0.95      2383
   macro avg       0.95      0.92      0.94      2383
weighted avg       0.95      0.95      0.95      2383



['clf_log.joblib']

In [6]:
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
clf_linear = make_pipeline(StandardScaler(),LinearSVC())

print("linear: ")
result_linear = get_avg_eva(clf_linear) 
clf_linear.fit(feature_train, label_train)
pred_linear = clf_linear.predict(feature_test)
print(classification_report(label_test,pred_linear))

linear: 
score_time0.07166616916656494
accuracy:0.8383567471923092
              precision    recall  f1-score   support

         0.0       0.89      0.89      0.89      1736
         1.0       0.71      0.71      0.71       647

    accuracy                           0.84      2383
   macro avg       0.80      0.80      0.80      2383
weighted avg       0.84      0.84      0.84      2383



In [7]:
import joblib
import math
from sklearn.metrics.pairwise import euclidean_distances
def log_kernel(x,y):
    kernel = euclidean_distances(x,y)
    kernel = -np.log(kernel+1)
    return kernel

clf_log = svm.SVC(kernel = log_kernel)
print("log: ")
clf_log.fit(feature_train, label_train)
pred_log = clf_log.predict(feature_test)
print(classification_report(label_test,pred_log))



log: 
              precision    recall  f1-score   support

         0.0       0.95      0.98      0.97      1736
         1.0       0.95      0.87      0.90       647

    accuracy                           0.95      2383
   macro avg       0.95      0.92      0.94      2383
weighted avg       0.95      0.95      0.95      2383



In [8]:
joblib.dump(clf_linear,'clf_linear.joblib')

['clf_linear.joblib']