### Setup

In [1]:
# Loading the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

In [2]:
# Setting up tweaks for the visualization
sns.set(style="whitegrid")

### New Functions

In [3]:
def load_data(url):
    return pd.read_csv(url)

def preprocess_data(df):
    df['Age'] = df['Age'].fillna(df['Age'].median())
    df['Embarked'] = df['Embarked'].fillna(df['Embarked'].mode()[0])
    df = df.drop('Cabin', axis=1)
    df['Sex'] = LabelEncoder().fit_transform(df['Sex'])
    df = pd.get_dummies(df, columns=['Embarked'])
    df['FamilySize'] = df['SibSp'] + df['Parch']
    df = df.drop(['Name', 'Ticket', 'PassengerId'], axis=1)
    return df

def split_data(df):
    X = df.drop('Survived', axis=1)
    y = df['Survived']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    return X_train, X_test, y_train, y_test

def scale_data(X_train, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled

def train_model(X_train, y_train):
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    return model

def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("\nClassification Report:\n", classification_report(y_test, y_pred))
    print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
    plt.figure(figsize=(10,7))
    sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.show()

### Loading the data

In [None]:
url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'
raw_titanic = load_data(url)
raw_titanic.info()

### Feature Engineering

In [None]:
titanic_df = preprocess_data(raw_titanic)
titanic_df.head()

### Splitting the dataset

In [None]:
X_train, X_test, y_train, y_test = split_data(titanic_df)

# Scaling features
X_train_scaled, X_test_scaled = scale_data(X_train, X_test)

### Training the model

In [None]:
model = train_model(X_train_scaled, y_train)

### Evaluating the model

In [None]:
evaluate_model(model, X_test_scaled, y_test)