In [1]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, log_loss
from sklearn import model_selection

In [2]:
df_train_mean_encoding = pd.read_csv('../Feature_Encoding/data/train_mean_encoding.csv')
df_test_mean_encoding = pd.read_csv('../Feature_Encoding/data/test_mean_encoding.csv')
df_train_binary_encoding = pd.read_csv('../Feature_Encoding/data/train_binary_encoding.csv')
df_test_binary_encoding = pd.read_csv('../Feature_Encoding/data/test_binary_encoding.csv')
train = pd.read_csv('../Feature_Engineering/data/other-cleaned_train.csv')

In [3]:
def cross_val(model, x_train, y_train):
    score_cross_val = model_selection.cross_val_score(model, x_train, y_train, cv=5)
    print(score_cross_val.mean())

In [4]:
def test_model(model, x_test, y_test):
    predictions = model.predict_proba(x_test)[:,1]
    logloss = log_loss(y_test, predictions)
    accuracy = accuracy_score(y_test, predictions.round())
    print("Accuracy: %.2f%%, Logloss: %.2f" % (accuracy*100.0, logloss))

In [5]:
def svc(x_train, y_train, x_test, y_test):
  classifier = SVC()

  parameters = {'C': [0.1, 1, 10, 100, 1000], 'gamma': [1, 0.1, 0.01, 0.0001], 'kernel' : ['linear',
                                                                                         'sigmoid', 'poly', 'rbf']}
  grid_s_p = model_selection.RandomizedSearchCV(classifier, parameters, refit = True) 

  model = grid_s_p.fit(x_train, y_train)

  model.best_params_

  svc_best = grid_s_p.best_estimator_

  print(model.best_params_)

  print('svc: {}'.format(svc_best.score(x_test, y_test)))
  preds = model.predict(x_train)
  print("Accuracy score: ", accuracy_score(y_train, preds))
  print("Precision score: ", precision_score(y_train, preds))
  print("Recall score: ", recall_score(y_train, preds))
  print("f1 score: ", f1_score(y_train, preds))
  
  return model

In [6]:
y = train.Target
x_train_mean_encoding, x_validation_mean_encoding, y_train_mean_encoding, y_validation_mean_encoding = model_selection.train_test_split(df_train_mean_encoding, y, test_size=0.3, stratify=y)
x_train_binary_encoding, x_validation_binary_encoding, y_train_binary_encoding, y_validation_binary_encoding = model_selection.train_test_split(df_train_binary_encoding, y, test_size=0.3, stratify=y)

## Mean encoding

In [None]:
svc_mean = svc(x_train_mean_encoding, y_train_mean_encoding, x_validation_mean_encoding, y_validation_mean_encoding)

In [None]:
test_model(svc_mean,x_validation_binary_encoding,y_validation_binary_encoding)

In [None]:
cross_val(svc_mean, x_train_binary_encoding, y_train_binary_encoding)

## Binary Encoding

In [None]:
svc_binary = svc(x_train_binary_encoding, y_train_binary_encoding, x_validation_binary_encoding, y_validation_binary_encoding)

In [None]:
test_model(svc_binary,x_validation_binary_encoding,y_validation_binary_encoding)

In [None]:
cross_val(svc_binary, x_train_binary_encoding, y_train_binary_encoding)