In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score, cross_val_predict, train_test_split, GridSearchCV
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns; sns.set_style("dark")

In [2]:
train = pd.read_csv("../data/train_small.csv")
test = pd.read_csv("../data/test_fe.csv")

FileNotFoundError: [Errno 2] No such file or directory: '../data/train_small.csv'

In [None]:
train.columns

In [None]:
train.head()

In [None]:
x_train_use, y_train_use = train.drop("target", axis = 1), train["target"]

In [None]:
#x_train_use = x_train_use.drop_duplicates().reset_index(drop=True)

In [None]:
train.shape

In [None]:
x_train, x_val, y_train, y_val = train_test_split(x_train_use, y_train_use, train_size=0.8, random_state = 42)

In [None]:
x_train.shape, y_train.shape

### Support Vector Classifiers

In [None]:
svc = SVC(kernel="linear")
svc.fit(x_train, np.array(y_train).reshape(-1,1))

In [None]:
y_pred = svc.predict(x_train)

In [None]:
f1_score(y_train, y_pred)

In [None]:
svc_pipeline = Pipeline([("svc", SVC(kernel = "poly"))])
param_grid = [{"svc__degree":np.arange(10)}]
grid_search = GridSearchCV(svc_pipeline, param_grid, cv = 5)

In [None]:
grid_search.fit(x_train, np.array(y_train).reshape(-1,1))

In [None]:
grid_search.best_params_

In [None]:
model = grid_search.best_estimator_

In [None]:
model.fit(x_train, y_train)

In [None]:
y_pred = model.predict(x_train)

In [None]:
f1_score(y_train, y_pred)

In [None]:
print(classification_report(y_train, y_pred))

In [None]:
train["target"].value_counts()

In [None]:
import seaborn as sns; sns.set_style("darkgrid")

In [None]:
def plot_confusion_matrix(title,conf_matrix):
    plt.figure(figsize=(8, 6))
    sns.set(font_scale=1.2)
    sns.heatmap(conf_matrix, annot=True, fmt='g', cmap='Blues', cbar=False,
                xticklabels=['Predicted 0', 'Predicted 1'],
                yticklabels=['Actual 0', 'Actual 1'])
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.title(title)
    plt.savefig("../conf/"+title+".png")
    plt.show()

In [None]:
conf = confusion_matrix(y_train, y_pred)
sns.heatmap(conf, annot=True, cmap= "Blues")

In [None]:
y_pred_val = model.predict(x_val)

In [None]:
f1_score(y_val, y_pred_val)

In [None]:
print(classification_report(y_val, y_pred_val))

#### Extracting results

In [None]:
import time
start = time.time()
model.fit(x_train_use, y_train_use)
elapsed = time.time() - start

In [None]:
pred_use = model.predict(x_train_use)

In [None]:
cross_val_score(model, x_train_use, y_train_use, scoring="f1")

In [None]:
print(classification_report(y_train_use, pred_use))

In [None]:
f1_train = f1_score(y_train_use, pred_use)
prec_train = precision_score(y_train_use, pred_use)
recall_train = recall_score(y_train_use, pred_use)

In [None]:
test

In [None]:
x_test, y_test = test.drop("target", axis =1), test["target"]

In [None]:
pred_test = model.predict(x_test)

In [None]:
print(classification_report(y_test, pred_test))

In [None]:
f1_test = f1_score(y_test, pred_test)
prec_test = precision_score(y_test, pred_test)
recall_test = recall_score(y_test, pred_test)

In [None]:
conf = confusion_matrix(y_test, pred_test)

In [None]:
plot_confusion_matrix("svc",conf)

In [None]:
df = pd.DataFrame()
df["f1_test"] = [f1_test]
df["f1_train"] = f1_train
df["prec_train"] = prec_train
df["prec_test"] = prec_test
df["recall_train"] = recall_train
df["recall_test"] = recall_test
df["model"] = "SVC"
df["elapsed"] = elapsed

In [None]:
df

In [None]:
df.to_csv("../results/regular/svc.csv", index=False)