In [105]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import GridSearchCV,StratifiedShuffleSplit
from sklearn.linear_model import LogisticRegression
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder,PolynomialFeatures
from sklearn.impute import SimpleImputer
from sklearn.metrics import confusion_matrix,accuracy_score
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
%matplotlib inline

In [106]:
data = pd.read_csv('train.csv')

In [107]:
sss = StratifiedShuffleSplit(n_splits=1,test_size=0.2,random_state=0)

In [108]:
for train_index, test_index in sss.split(data, data['Survived']):
    train_data = data.loc[train_index]
    test_data = data.loc[test_index]

In [109]:
imputer = SimpleImputer(strategy='mean')
train_data['Age'] = imputer.fit_transform(train_data[['Age']])
test_data['Age'] = imputer.transform(test_data[['Age']])
train_data.drop(['Name','Cabin','Ticket','PassengerId'],axis=1,inplace=True)
test_data.drop(['Name','Cabin','Ticket','PassengerId'],axis=1,inplace=True)

In [110]:

train_data = pd.get_dummies(data=train_data,columns=['Embarked','Sex']).astype(float)
test_data = pd.get_dummies(data=test_data,columns=['Embarked','Sex']).astype(float)

In [111]:
sc = StandardScaler()

X_train = train_data.drop(['Survived'],axis=1)
y_train = train_data['Survived']

X_test = test_data.drop(['Survived'],axis=1)
y_val = test_data['Survived']


X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [112]:
rf = SVC(random_state=0)
param_grid = {
    'C': [0.1, 1, 10],               # Smaller range of C values
    'gamma': ['scale', 0.01, 0.1],   # Fewer gamma values
    'kernel': ['linear', 'rbf']      # Focus on linear and RBF kernels only
}

gs = GridSearchCV(estimator=rf,param_grid=param_grid,cv=3,verbose=4,scoring='accuracy')

In [113]:
gs.fit(X_train,y_train)

Fitting 3 folds for each of 18 candidates, totalling 54 fits
[CV 1/3] END .C=0.1, gamma=scale, kernel=linear;, score=0.828 total time=   0.0s
[CV 2/3] END .C=0.1, gamma=scale, kernel=linear;, score=0.785 total time=   0.0s
[CV 3/3] END .C=0.1, gamma=scale, kernel=linear;, score=0.764 total time=   0.0s
[CV 1/3] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.845 total time=   0.0s
[CV 2/3] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.789 total time=   0.0s
[CV 3/3] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.789 total time=   0.0s
[CV 1/3] END ..C=0.1, gamma=0.01, kernel=linear;, score=0.828 total time=   0.0s
[CV 2/3] END ..C=0.1, gamma=0.01, kernel=linear;, score=0.785 total time=   0.0s
[CV 3/3] END ..C=0.1, gamma=0.01, kernel=linear;, score=0.764 total time=   0.0s
[CV 1/3] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.840 total time=   0.0s
[CV 2/3] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.781 total time=   0.0s
[CV 3/3] END .....C=0.1, gamma=0.01, kernel=rbf;

In [114]:
rf = gs.best_estimator_

In [115]:
new_data = pd.read_csv('test.csv')

In [116]:
new_data.drop(['Name','Cabin','Ticket','PassengerId'],axis=1,inplace=True)


In [117]:
new_data['Age'] = imputer.transform(new_data[['Age']])
new_data['Fare'].fillna(new_data['Fare'].mean(),inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  new_data['Fare'].fillna(new_data['Fare'].mean(),inplace=True)


In [118]:
new_data = pd.get_dummies(data=new_data,columns=['Embarked','Sex']).astype(float)

In [119]:
new_data = sc.transform(new_data)

In [120]:
y_pred = rf.predict(new_data)

In [121]:
result = pd.DataFrame(
    {
        "PassengerId":list(range(892,892+len(y_pred))),
        "Survived" : y_pred
    }
    )
result.to_csv("submission.csv",index=False)

In [122]:
y_pred

array([0., 1., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 1., 0.,
       0., 1., 1., 0., 1., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 0.,
       0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 1., 0.,
       0., 1., 1., 0., 1., 0., 1., 0., 0., 1., 0., 1., 1., 0., 0., 0., 0.,
       0., 1., 1., 1., 1., 1., 0., 1., 0., 0., 0., 1., 0., 1., 0., 1., 0.,
       0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 1., 0.,
       1., 1., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 0., 1., 1., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 1., 1.,
       0., 0., 0., 0., 0., 1., 1., 0., 1., 1., 0., 0., 1., 0., 1., 0., 1.,
       0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 1., 0., 1., 1., 1., 0., 1.,
       0., 0., 1., 0., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 1.,
       0., 1., 0., 1., 1.