# Importing Libraries

In [444]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Training Set

# Reading Dataset

In [445]:
dataset = pd.read_csv("C:/Drive D/ArhatPersonal/ML/Kaggle/titanic/train.csv")
X_train = dataset.iloc[:, [2,4,5,6,7,9]].values
y_train = dataset.iloc[:, 1].values

In [446]:
print(X_train)

[[3 'male' 22.0 1 0 7.25]
 [1 'female' 38.0 1 0 71.2833]
 [3 'female' 26.0 0 0 7.925]
 ...
 [3 'female' nan 1 2 23.45]
 [1 'male' 26.0 0 0 30.0]
 [3 'male' 32.0 0 0 7.75]]


# Taking care of missing data

In [447]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer.fit(X_train[:, [2]])
X_train[:, [2]] = imputer.transform(X_train[:, [2]])

In [448]:
print(X_train)

[[3 'male' 22.0 1 0 7.25]
 [1 'female' 38.0 1 0 71.2833]
 [3 'female' 26.0 0 0 7.925]
 ...
 [3 'female' 29.69911764705882 1 2 23.45]
 [1 'male' 26.0 0 0 30.0]
 [3 'male' 32.0 0 0 7.75]]


# Encoding categorical data

In [449]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X_train[:, 1] = le.fit_transform(X_train[:, 1])


In [450]:
print(X_train)

[[3 1 22.0 1 0 7.25]
 [1 0 38.0 1 0 71.2833]
 [3 0 26.0 0 0 7.925]
 ...
 [3 0 29.69911764705882 1 2 23.45]
 [1 1 26.0 0 0 30.0]
 [3 1 32.0 0 0 7.75]]


# Feature Scaling

In [451]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train[:, [2,3,4,5]] = sc.fit_transform(X_train[:, [2,3,4,5]])

In [452]:
print(X_train)

[[3 1 -0.5924805998028931 0.4327933656785018 -0.4736736092984604
  -0.5024451714361923]
 [1 0 0.6387890120425208 0.4327933656785018 -0.4736736092984604
  0.7868452935884461]
 [3 0 -0.2846631968415396 -0.47454519624983954 -0.4736736092984604
  -0.4888542575852486]
 ...
 [3 0 0.0 0.4327933656785018 2.0089333664952354 -0.17626323901354432]
 [1 1 -0.2846631968415396 -0.47454519624983954 -0.4736736092984604
  -0.04438103794142432]
 [3 1 0.17706290760049057 -0.47454519624983954 -0.4736736092984604
  -0.49237782784290063]]


# Training the model

In [453]:
from sklearn.svm import SVC
classifier = SVC(kernel='rbf', random_state=0)
classifier.fit(X_train, y_train)

# Confusion Matrix and Accuracy

In [454]:
from sklearn.metrics import confusion_matrix
y_pred_train = classifier.predict(X_train)
cm = confusion_matrix(y_train, y_pred_train)
print(cm)

from sklearn.metrics import accuracy_score
print(accuracy_score(y_train, y_pred_train))

[[492  57]
 [ 94 248]]
0.8305274971941639


# Test Set

# Importing Test Set

In [455]:
Test_dataset = pd.read_csv("C:/Drive D/ArhatPersonal/ML/Kaggle/titanic/test.csv")
X_test = Test_dataset.iloc[:, [1,3,4,5,6,8]].values

In [456]:
print(X_test)

[[3 'male' 34.5 0 0 7.8292]
 [3 'female' 47.0 1 0 7.0]
 [2 'male' 62.0 0 0 9.6875]
 ...
 [3 'male' 38.5 0 0 7.25]
 [3 'male' nan 0 0 8.05]
 [3 'male' nan 1 1 22.3583]]


# Taking Care of Missing Data

In [457]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer.fit(X_test[:, [2]])
X_test[:, [2]] = imputer.transform(X_test[:, [2]])

In [458]:
print(X_test)

[[3 'male' 34.5 0 0 7.8292]
 [3 'female' 47.0 1 0 7.0]
 [2 'male' 62.0 0 0 9.6875]
 ...
 [3 'male' 38.5 0 0 7.25]
 [3 'male' 30.272590361445783 0 0 8.05]
 [3 'male' 30.272590361445783 1 1 22.3583]]


# Encoding categorical data

In [459]:
X_test[:, 1] = le.transform(X_test[:, 1])

In [460]:
print(X_test)

[[3 1 34.5 0 0 7.8292]
 [3 0 47.0 1 0 7.0]
 [2 1 62.0 0 0 9.6875]
 ...
 [3 1 38.5 0 0 7.25]
 [3 1 30.272590361445783 0 0 8.05]
 [3 1 30.272590361445783 1 1 22.3583]]


# Feature Scaling

In [461]:
X_test[:, [2,3,4,5]] = sc.transform(X_test[:, [2,3,4,5]])

In [462]:
print(X_test)

[[3 1 0.3694487844513365 -0.47454519624983954 -0.4736736092984604
  -0.49078316061772326]
 [3 0 1.3313781687055661 0.4327933656785018 -0.4736736092984604
  -0.5074788432328381]
 [2 1 2.4856934298106417 -0.47454519624983954 -0.4736736092984604
  -0.4533668714188957]
 ...
 [3 1 0.67726618741269 -0.47454519624983954 -0.4736736092984604
  -0.5024451714361923]
 [3 1 0.044131220402948215 -0.47454519624983954 -0.4736736092984604
  -0.4863374216869257]
 [3 1 0.044131220402948215 0.4327933656785018 0.7676298785983874
  -0.19824427701513722]]


# Predicting Test Set results

In [463]:
y_pred = classifier.predict(X_test)

# Creating Submission File

In [464]:
submission = pd.DataFrame({'PassengerId':Test_dataset['PassengerId'],'Survived':y_pred})
submission.to_csv('submission.csv',index=False)