### Importing Basic Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Reading Training Data and Cleaning it

In [None]:
train_data = pd.read_csv('train.csv')
train_data.head()

In [None]:
train_data = train_data.drop(['PassengerId' , 'Name' , 'SibSp' , 'Parch' , 'Ticket' , 'Cabin' , 'Embarked'],axis=1)
train_data.head()

In [None]:
train_data.info()

In [None]:
train_data.describe()

In [None]:
train_data.isnull().sum()

In [None]:
gender = pd.get_dummies(train_data['Sex'])
train_data = pd.concat([train_data,gender],axis=1)

In [None]:
train_data.head()

In [None]:
train_data = train_data.drop(['Sex','female'],axis=1)
train_data.head()

In [None]:
mean_age = round(train_data.Age.mean())
train_data.Age.fillna(mean_age,inplace=True)

In [None]:
train_data

In [None]:
train_data.isnull().sum()

### Using Machine Learning to Predict the test data (Also using same methods for cleaning of test data )

In [None]:
X = train_data.drop('Survived',axis=1).values
y = train_data['Survived'].values

In [None]:
from sklearn.model_selection import GridSearchCV
tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],'C': [1, 10, 100, 1000]}]

clf = GridSearchCV(SVC(), tuned_parameters,cv=5)
clf.fit(X, y)

In [None]:
clf.best_score_

In [None]:
clf.best_params_

In [None]:
from sklearn.svm import SVC
model = SVC(kernel='rbf', C=1000, gamma=0.001)
model.fit(X,y)

In [None]:
test_data = pd.read_csv('test.csv')
test_data.head()

In [None]:
passenger = test_data['PassengerId'].values

In [None]:
test_data = test_data.drop(['PassengerId' , 'Name' , 'SibSp' , 'Parch' , 'Ticket' , 'Cabin' , 'Embarked'],axis=1)
test_data.head()

In [None]:
test_data.isnull().sum()

In [None]:
test_data['Age'].fillna(test_data.Age.mean(),inplace=True)
test_data['Fare'].fillna(test_data.Fare.mean(),inplace=True)


In [None]:
test_data.isnull().sum()


In [None]:
gender = pd.get_dummies(test_data['Sex'])
test_data = pd.concat([test_data,gender],axis=1)
test_data = test_data.drop(['Sex','female'],axis=1)
test_data.head()

In [None]:
predictions = model.predict(test_data)

### Creating an external csv file for storing predictions

In [None]:
submission = pd.DataFrame({'PassengerId': passenger , 'Survived' : predictions})
submission

In [None]:
submission.to_csv('submission.csv',index=False)

### Visualizing the Training Data

In [None]:
plt.style.use('bmh') 

In [None]:
not_survived = train_data[train_data['Survived'] == 0 ]

In [None]:
sns.histplot(train_data['Age'])
plt.show()


##Using this plot we can say people whose age were 30s were the most number of people who didnt survived

In [None]:
sns.kdeplot(train_data['Pclass'])
plt.show()

## Here we can see most number of people were in Pclass 3

In [None]:
sns.histplot(not_survived['male'])
plt.show()

##Males died more than females