In [47]:
import zipfile
import pandas as pd

with zipfile.ZipFile("/content/titanic.zip",'r') as zip_ref:
  zip_ref.extractall(".")

In [48]:
train_data=pd.read_csv("/content/titanic/titanic/train.csv")
test_data=pd.read_csv("/content/titanic/titanic/test.csv")

In [49]:
train_data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [50]:
test_data.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [51]:
train_data=train_data.drop("Cabin",axis=1)
test_data=test_data.drop("Cabin",axis=1)

In [52]:
print(train_data.isna().sum(),"\n")
print(test_data.isna().sum())

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Embarked         2
dtype: int64 

PassengerId     0
Pclass          0
Name            0
Sex             0
Age            86
SibSp           0
Parch           0
Ticket          0
Fare            1
Embarked        0
dtype: int64


In [53]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder, StandardScaler

def preprocessor(data):
    # Numerical pipeline
    num_pip = Pipeline([
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler())
    ])

    # Categorical pipeline
    cat_pip = Pipeline([
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("encoder", OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
    ])

    # Features
    num_fea = ["Age", "SibSp", "Parch", "Fare"]
    cat_fea = ["Pclass", "Sex", "Embarked"]

    # Column transformer
    preprocess_pipeline = ColumnTransformer([
        ("num", num_pip, num_fea),
        ("cat", cat_pip, cat_fea)
    ])

    return preprocess_pipeline


In [54]:
pipeline=preprocessor(train_data)
X_train=pipeline.fit_transform(train_data)
pipeline=preprocessor(test_data)
X_test=pipeline.fit_transform(test_data)

In [56]:
X_train

array([[-0.56573582,  0.43279337, -0.47367361, ...,  0.        ,
         0.        ,  1.        ],
       [ 0.6638609 ,  0.43279337, -0.47367361, ...,  1.        ,
         0.        ,  0.        ],
       [-0.25833664, -0.4745452 , -0.47367361, ...,  0.        ,
         0.        ,  1.        ],
       ...,
       [-0.10463705,  0.43279337,  2.00893337, ...,  0.        ,
         0.        ,  1.        ],
       [-0.25833664, -0.4745452 , -0.47367361, ...,  1.        ,
         0.        ,  0.        ],
       [ 0.20276213, -0.4745452 , -0.47367361, ...,  0.        ,
         1.        ,  0.        ]])

In [57]:
y_train=train_data["Survived"]

In [58]:
from sklearn.ensemble import RandomForestClassifier
rf_clf=RandomForestClassifier(n_estimators=100,random_state=42)
rf_clf.fit(X_train,y_train)

In [59]:
y_pred=rf_clf.predict(X_test)

In [63]:
from sklearn.model_selection import cross_val_score
rf_scores=cross_val_score(rf_clf,X_train,y_train,cv=10)
rf_scores.mean()

np.float64(0.8137578027465668)

In [65]:
from sklearn.svm import SVC
svc=SVC(gamma="auto")
svc.fit(X_train,y_train)

In [66]:
svc_scores=cross_val_score(svc,X_train,y_train,cv=10)
svc_scores.mean()

np.float64(0.8249313358302123)