## Start the Docker container

### Run the following commands:
docker run -it --rm -p 8888:8888 -v /Users/sylvain/Data_Science/Kaggle/competition_titanic/datasets:/home/jovyan/datasets jupyter-server

In [44]:
from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
import matplotlib as plt

### Load the datasets

In [45]:
# Train dataset
df_train = pd.read_csv(
	filepath_or_buffer="datasets/train_clean.csv",
 index_col=0
)

# Test dataset
df_test = pd.read_csv(
	filepath_or_buffer="datasets/test_clean.csv",
 index_col=0
)

### Create the train and test dataframes

In [46]:
# create the features and target datasets
features = df_train.copy()
features.drop(labels="Survived", axis=1, inplace=True)
target = df_train.loc[:,"Survived"].copy()
# create the train and test datasets
x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.25, random_state=42)

In [47]:
df_train.head()

Unnamed: 0_level_0,Survived,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S,siblings,couple,firstClass,secondClass,thirdClass,kid,young_adult,mature_adult,senior,low_fare,medium_fare,high_fare
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1,0,0,1,0,0,1,1,0,0,0,1,0,1,0,0,1,0,0
2,1,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,1,0
3,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0
4,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0
5,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0


### Create and fit the model

In [48]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

hyperParameter = [{
    "C": [0.3, 1, 3],
    "kernel": ["linear", "rbf", "poly"],
    "gamma": ["scale", "auto"],
    "decision_function_shape": ["ovo", "ovr"],
}]

svmEstimator = SVC()

svm_cv = GridSearchCV(
	estimator=svmEstimator,
	param_grid=hyperParameter,
 	cv=8,
  	scoring="accuracy",
   	verbose=0,
)

svm_cv.fit(X=x_train, y=y_train)
svm_cv.best_params_


{'C': 0.3,
 'decision_function_shape': 'ovo',
 'gamma': 'scale',
 'kernel': 'poly'}

In [49]:
model = SVC(
    C=0.3,
 	decision_function_shape="ovo",
	gamma="scale",
	kernel="poly",
	
)

model.fit(X=x_train, y=y_train)

ypred = model.predict(X=x_test)

model.score(X=x_test, y=y_test)

0.820627802690583

<h3>Make a prediction for the competition</h3>

In [50]:
# fit the model with the whole dataset
model.fit(X=features, y=target)

# save it in a DataFrame
prediction = pd.DataFrame(data={
    "PassengerId": df_test.index,
    "Survived": model.predict(X=df_test),
    })

# save the prediction
prediction.to_csv(path_or_buf="datasets/prediction_SVM.csv", index=False)