# Imported Libraries

In [7]:
from sklearn.preprocessing import OrdinalEncoder
from sklearn import svm
import pandas as pd
import numpy as np
import re
import csv

# Fetching Training Data

In [3]:
trainingData = pd.read_csv("../Data/train.csv")
trainingData["Cabin"] = trainingData["Cabin"].apply(str)
trainingData["Embarked"] = trainingData["Embarked"].apply(str)

trainingData["Age"] = (trainingData['Age']).fillna(-10)
trainingData['Ticket'] = pd.to_numeric(trainingData['Ticket'], errors="coerce").fillna(-10)
trainingData['Fare'] = trainingData['Fare'].fillna(-10)

def nameClass(row):
    if (re.search("Mr\.",row["Name"])):
        return("Mr.")
    elif (re.search("Mrs\.",row["Name"])):
        return("Mrs.")
    elif (re.search("Miss\.",row["Name"])):
        return("Miss.")
    else:
        return("No Title")
        
trainingData["Title"] = trainingData.apply(lambda row: nameClass(row), axis=1)
trainingData.drop(["Name"], axis=1,inplace=True)

features = ["Pclass", "Sex", "SibSp", "Parch", "Cabin", "Embarked", "Age", "Ticket", "Fare", "Title"]
results = trainingData["Survived"]
encoder = OrdinalEncoder()
encoder.fit(trainingData[features])
trainingData = encoder.transform(trainingData[features])
results = results.to_numpy()
results = np.where(results==0, -1, results)

# CV-Sets

In [4]:
TrainingSets = []
TestSets = []
NUM_SETS = 5

for index in range(NUM_SETS):
    temp = []
    start = (NUM_SETS-index-1)*len(trainingData)//NUM_SETS
    end = (NUM_SETS-index)*len(trainingData)//NUM_SETS
    temp.append(np.append(trainingData[0:start],trainingData[end:], axis=0))
    temp.append(np.append(results[0:start],results[end:], axis=0))
    TrainingSets.append(temp)
    temp = []
    temp.append(trainingData[start:end])
    temp.append(results[start:end])
    TestSets.append(temp)

# Hyper-parameters

In [12]:
regularization = [0.5, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0]
kernels = ["linear", "poly", "rbf", "sigmoid", "precomputed"]

# CV-Testing

In [None]:
results = {}

for R in regularization:
    for K in kernels:   
        scores = []
        for index in range(NUM_SETS):
            classifier = svm.SVC(C = R, kernel = K)
            classifier.fit(TrainingSets[index][0], TrainingSets[index][1])
            scores.append(classifier.score(TestSets[index][0], TestSets[index][1]))
        params = (R,K)
        results[params] = scores
        
results



In [11]:
clf = svm.SVC(C = 1.0, kernel = "linear")
clf.fit(TrainingSets[0][0], TrainingSets[0][1])
clf.predict(TestSets[0][0])

array([-1, -1, -1, -1,  1,  1, -1, -1,  1, -1, -1, -1, -1, -1,  1,  1, -1,
        1,  1, -1, -1, -1, -1, -1,  1, -1, -1, -1, -1, -1,  1, -1, -1, -1,
       -1,  1, -1, -1,  1, -1, -1, -1,  1, -1, -1, -1, -1,  1, -1, -1, -1,
        1, -1,  1, -1,  1, -1, -1, -1, -1,  1, -1,  1, -1, -1,  1, -1,  1,
        1,  1, -1, -1, -1, -1,  1, -1, -1, -1, -1, -1,  1, -1, -1, -1,  1,
        1, -1,  1, -1,  1, -1, -1, -1, -1, -1,  1, -1,  1, -1, -1, -1,  1,
       -1, -1,  1, -1, -1, -1,  1, -1, -1,  1, -1, -1, -1, -1, -1,  1,  1,
       -1, -1, -1, -1,  1, -1, -1, -1, -1, -1, -1,  1, -1, -1, -1, -1, -1,
       -1,  1, -1, -1,  1,  1,  1,  1,  1, -1,  1, -1, -1, -1,  1,  1, -1,
        1,  1, -1, -1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1,  1,  1, -1,
        1, -1, -1,  1, -1,  1,  1, -1, -1], dtype=int64)