In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC

from scipy.stats import uniform, randint
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    classification_report,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score
)


In [11]:
train = pd.read_csv(r"D:\code\ML\Datasets\titanic_train.csv")

y = train["Survived"]
X = train.drop("Survived", axis=1)

In [12]:

# Feature Engineering
X["Deck"] = X["Cabin"].str[0]

X.loc[X["Deck"].isnull() & (X["Pclass"] == 1), "Deck"] = "B"
X.loc[X["Deck"].isnull() & (X["Pclass"] == 2), "Deck"] = "D"
X.loc[X["Deck"].isnull() & (X["Pclass"] == 3), "Deck"] = "F"

X["Embarked"] = X["Embarked"].fillna(X["Embarked"].mode()[0])

X["FamilySize"] = X["SibSp"] + X["Parch"] + 1
X["IsAlone"] = (X["FamilySize"] == 1).astype(int)

X["Title"] = X["Name"].str.extract(r' ([A-Za-z]+)\.', expand=False)
X["Title"] = X["Title"].replace(['Mlle', 'Ms'], 'Miss')
X["Title"] = X["Title"].replace('Mme', 'Mrs')
X["Title"] = X["Title"].replace(['Capt', 'Col', 'Major', 'Dr', 'Rev'], 'Officer')
X["Title"] = X["Title"].replace(
    ['Don', 'Dona', 'Lady', 'Countess', 'Jonkheer', 'Sir'], 'Noble'
)

X = X.drop(["PassengerId", "Name", "Ticket", "Cabin"], axis=1)

In [13]:

# Preprocessing
cat_features = X.select_dtypes(include=["object"]).columns
num_features = X.select_dtypes(include=["int64", "float64"]).columns

num_pipeline = make_pipeline(
    SimpleImputer(strategy="mean"),
    StandardScaler()
)

preprocessor = make_column_transformer(
    (num_pipeline, num_features),
    (OneHotEncoder(handle_unknown="ignore"), cat_features)
)

X_processed = preprocessor.fit_transform(X)


In [14]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y, test_size=0.2, random_state=42
)

In [15]:
# Models
lr = LogisticRegression(max_iter=1000, random_state=42)
rf = RandomForestClassifier(random_state=42)
svc = SVC(probability=True, random_state=42)

voting_clf = VotingClassifier(
    estimators=[
        ('lr', lr),
        ('rf', rf),
        ('svc', svc)
    ],
    voting='soft'
)

In [16]:
# Hyperparameter Tuning
param_distributions = {
    'lr__C': uniform(0.01, 10),
    'rf__n_estimators': randint(50, 200),
    'rf__max_depth': [None, 5, 10, 15],
    'svc__C': uniform(0.1, 5)
}

In [17]:
random_search = RandomizedSearchCV(
    estimator=voting_clf,
    param_distributions=param_distributions,
    n_iter=30,
    cv=5,
    scoring="accuracy",
    n_jobs=-1,
    random_state=42
)

In [18]:
random_search.fit(X_train, y_train)

best_model = random_search.best_estimator_

In [22]:
# Evaluation on Validation Set
y_pred = best_model.predict(X_test)
y_proba = best_model.predict_proba(X_test)[:, 1]

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_proba))

print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Cross-validation Score
cv_scores = cross_val_score(
    best_model, X_train, y_train, cv=5, scoring="accuracy"
)

print("Cross-validation Accuracy:", cv_scores.mean())

Accuracy: 0.8212290502793296
Precision: 0.8
Recall: 0.7567567567567568
F1 Score: 0.7777777777777778
ROC-AUC: 0.8897039897039898

Confusion Matrix:
 [[91 14]
 [18 56]]

Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.87      0.85       105
           1       0.80      0.76      0.78        74

    accuracy                           0.82       179
   macro avg       0.82      0.81      0.81       179
weighted avg       0.82      0.82      0.82       179

Cross-validation Accuracy: 0.8384516891559144


In [4]:
import nbformat
from nbconvert import PythonExporter

# path to your notebook
notebook_path = r"D:\code\ML\titanic_prediction_1.ipynb"

# load notebook
with open(notebook_path, "r", encoding="utf-8") as f:
    nb = nbformat.read(f, as_version=4)

# convert to python
exporter = PythonExporter()
python_code, _ = exporter.from_notebook_node(nb)

# save .py file
output_path = r"D:\code\ML\titanic_model.py"
with open(output_path, "w", encoding="utf-8") as f:
    f.write(python_code)

print("Notebook successfully converted to Python script!")


FileNotFoundError: [Errno 2] No such file or directory: 'D:\\code\\ML\\titanic_prediction_1.ipynb'