# Titanic dataset

In [1]:
import seaborn as sns

# Load the Titanic dataset
titanic_dataset = sns.load_dataset('titanic')

# Display the first few rows of the dataset
print(titanic_dataset.head())


   survived  pclass     sex   age  sibsp  parch     fare embarked  class  \
0         0       3    male  22.0      1      0   7.2500        S  Third   
1         1       1  female  38.0      1      0  71.2833        C  First   
2         1       3  female  26.0      0      0   7.9250        S  Third   
3         1       1  female  35.0      1      0  53.1000        S  First   
4         0       3    male  35.0      0      0   8.0500        S  Third   

     who  adult_male deck  embark_town alive  alone  
0    man        True  NaN  Southampton    no  False  
1  woman       False    C    Cherbourg   yes  False  
2  woman       False  NaN  Southampton   yes   True  
3  woman       False    C  Southampton   yes  False  
4    man        True  NaN  Southampton    no   True  


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

titanic_dataset = sns.load_dataset('titanic')


titanic_dataset = titanic_dataset.dropna(subset=['age', 'embarked'])

X = titanic_dataset[['sex', 'age', 'fare','adult_male']]

df_encoded = pd.get_dummies(titanic_dataset['pclass'], prefix='pclass')

X = pd.concat([X, df_encoded], axis=1)
y = titanic_dataset['survived']


le = LabelEncoder()
X['sex'] = le.fit_transform(X['sex'])
X['adult_male'] = le.fit_transform(X['adult_male'])



X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [3]:
X.head()

Unnamed: 0,sex,age,fare,adult_male,pclass_1,pclass_2,pclass_3
0,1,22.0,7.25,1,0,0,1
1,0,38.0,71.2833,0,1,0,0
2,0,26.0,7.925,0,0,0,1
3,0,35.0,53.1,0,1,0,0
4,1,35.0,8.05,1,0,0,1


In [4]:
import pickle
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC



models = {
    'SVM': SVC(probability=True),
}

accuracies = {}

for model_name, model in models.items():
    param_grid = {}
    if model_name == 'SVM':
        param_grid = {'C': [1, 10, 100], 'gamma': ['scale', 'auto'],'probability': [True]}

    grid_search = GridSearchCV(model, param_grid, cv=5)
    grid_search.fit(X_train, y_train)

    best_model = grid_search.best_estimator_
    y_pred = best_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    accuracies[model_name] = accuracy

    # Export the best model using pickle
    filename = f"{model_name}_model.pkl"
    with open(filename, 'wb') as file:
        pickle.dump(best_model, file)

# Print the accuracies
for model_name, accuracy in accuracies.items():
    print(f"{model_name}: {accuracy:.2f}")

SVM: 0.77
