In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
data=pd.read_csv('train.csv')

In [3]:
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


# Data Preprocessing

In [4]:
data.drop(['PassengerId', 'Name', 'SibSp', 'Parch', 'Ticket', 'Cabin', 'Embarked'], axis=1, inplace=True)

In [5]:
data.head()

Unnamed: 0,Survived,Pclass,Sex,Age,Fare
0,0,3,male,22.0,7.25
1,1,1,female,38.0,71.2833
2,1,3,female,26.0,7.925
3,1,1,female,35.0,53.1
4,0,3,male,35.0,8.05


In [6]:
data.isnull().sum()

Survived      0
Pclass        0
Sex           0
Age         177
Fare          0
dtype: int64

In [9]:
data['Age'].fillna(data['Age'].mean(), inplace=True)

In [10]:
data.isnull().sum()

Survived    0
Pclass      0
Sex         0
Age         0
Fare        0
dtype: int64

In [12]:
sex_dummies=pd.get_dummies(data['Sex'], drop_first=True)

In [13]:
data.head()

Unnamed: 0,Survived,Pclass,Sex,Age,Fare
0,0,3,male,22.0,7.25
1,1,1,female,38.0,71.2833
2,1,3,female,26.0,7.925
3,1,1,female,35.0,53.1
4,0,3,male,35.0,8.05


In [14]:
data=pd.concat([data,sex_dummies], axis=1)

In [15]:
data.head()

Unnamed: 0,Survived,Pclass,Sex,Age,Fare,male
0,0,3,male,22.0,7.25,1
1,1,1,female,38.0,71.2833,0
2,1,3,female,26.0,7.925,0
3,1,1,female,35.0,53.1,0
4,0,3,male,35.0,8.05,1


In [16]:
data.drop(['Sex'], axis=1, inplace=True)

In [17]:
data.head()

Unnamed: 0,Survived,Pclass,Age,Fare,male
0,0,3,22.0,7.25,1
1,1,1,38.0,71.2833,0
2,1,3,26.0,7.925,0
3,1,1,35.0,53.1,0
4,0,3,35.0,8.05,1


In [22]:
from sklearn.preprocessing import StandardScaler
sts=StandardScaler()

In [23]:
feature_scale = ['Age','Fare']
data[feature_scale] = sts.fit_transform(data[feature_scale])

In [24]:
data.head()

Unnamed: 0,Survived,Pclass,Age,Fare,male
0,0,3,-0.592481,-0.502445,1
1,1,1,0.638789,0.786845,0
2,1,3,-0.284663,-0.488854,0
3,1,1,0.407926,0.42073,0
4,0,3,0.407926,-0.486337,1


In [25]:
x=data.drop(['Survived'],axis=1)
y=data['Survived']

In [26]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

In [27]:
model_param = {
    'DecisionTreeClassifier':{
        'model':DecisionTreeClassifier(),
        'param':{
            'criterion': ['gini','entropy']
        }
    },
        'KNeighborsClassifier':{
        'model':KNeighborsClassifier(),
        'param':{
            'n_neighbors': [5,10,15,20,25]
        }
    },
        'SVC':{
        'model':SVC(),
        'param':{
            'kernel':['rbf','linear','sigmoid'],
            'C': [0.1, 1, 10, 100]
         
        }
    }
}

In [29]:
scores =[]
for model_name, mp in model_param.items():
    model_selection = GridSearchCV(estimator=mp['model'],param_grid=mp['param'],cv=5,return_train_score=False)
    model_selection.fit(x,y)
    scores.append({
        'model': model_name,
        'best_score': model_selection.best_score_,
        'best_params': model_selection.best_params_
    })

In [30]:
data_model_score = pd.DataFrame(scores,columns=['model','best_score','best_params'])
data_model_score

Unnamed: 0,model,best_score,best_params
0,DecisionTreeClassifier,0.775601,{'criterion': 'gini'}
1,KNeighborsClassifier,0.802492,{'n_neighbors': 5}
2,SVC,0.811481,"{'C': 100, 'kernel': 'rbf'}"


In [31]:
model_svc = SVC( C= 100,kernel='rbf')

In [32]:
model_svc.fit(x, y)

SVC(C=100)

In [33]:
data2=pd.read_csv('test.csv')

In [44]:
data3=pd.read_csv('test.csv')

In [34]:
data2.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [35]:
data2.drop(['PassengerId', 'Name', 'SibSp', 'Parch', 'Ticket', 'Cabin', 'Embarked'], axis=1, inplace=True)

In [36]:
data2.isnull().sum()

Pclass     0
Sex        0
Age       86
Fare       1
dtype: int64

In [37]:
data2['Age'].fillna(data2['Age'].mean(),inplace=True)
data2['Fare'].fillna(data2['Fare'].mean(),inplace=True)

In [38]:
data2.head()

Unnamed: 0,Pclass,Sex,Age,Fare
0,3,male,34.5,7.8292
1,3,female,47.0,7.0
2,2,male,62.0,9.6875
3,3,male,27.0,8.6625
4,3,female,22.0,12.2875


In [40]:
sex_dummies2=pd.get_dummies(data2['Sex'],drop_first=True)
data2= pd.concat([data2,sex_dummies2],axis=1)
data2.drop(['Sex'], axis=1, inplace=True )


In [41]:
data2[feature_scale] = sts.fit_transform(data2[feature_scale])

In [42]:
data2.head()

Unnamed: 0,Pclass,Age,Fare,male
0,3,0.334993,-0.498407,1
1,3,1.32553,-0.513274,0
2,2,2.514175,-0.465088,1
3,3,-0.25933,-0.483466,1
4,3,-0.655545,-0.418471,0


In [43]:
y_predicted = model_svc.predict(data2)

In [45]:
submission = pd.DataFrame({
        "PassengerId": data3['PassengerId'],
        "Survived": y_predicted
    })

In [46]:
submission.to_csv('titanic_submission.csv', index=False)