In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [None]:
#Importing Dataset
df = pd.read_csv('Titanic.csv')

In [None]:
# Cleaning Data
# dropping Irrelevant or high missing columns
df = df.drop(columns=["PassengerId", "Name", "Ticket", "Cabin"])


In [None]:
#Handling missing values
df["Age"].fillna(df["Age"].median(), inplace=True)
df["Embarked"].fillna(df["Embarked"].mode()[0], inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Age"].fillna(df["Age"].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Embarked"].fillna(df["Embarked"].mode()[0], inplace=True)


In [6]:
# Encoding categorical variables
# Label encode 'Sex' and 'Embarked' columns
le_sex = LabelEncoder()
le_embarked = LabelEncoder()
df["Sex"] = le_sex.fit_transform(df["Sex"])           # male=1, female=0
df["Embarked"] = le_embarked.fit_transform(df["Embarked"])  # S=2, C=0, Q=1 (depends on fit order)

In [9]:
#Splitting dataset into features and target
X = df.drop("Survived", axis=1).values   # Features
y = df["Survived"].values                # Target


In [10]:
# Splitting into Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [11]:
#Feature Scaling
# Scale features for uniformity, although Random Forest does not strictly require it
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [12]:
#Training the Random Forest Classifier
classifier = RandomForestClassifier(
    n_estimators=100,         # number of trees
    criterion='entropy',      # split measure ('gini' also valid)
    random_state=42           # for reproducibility
)
classifier.fit(X_train, y_train)

In [14]:
 #Predicting the Test set results
y_pred = classifier.predict(X_test)

In [15]:
#Making the Confusion Matrix and evaluating
cm = confusion_matrix(y_test, y_pred)
acc = accuracy_score(y_test, y_pred)
print("Confusion Matrix:\n", cm)
print("Accuracy:", acc)
print("Classification Report:\n", classification_report(y_test, y_pred))

Confusion Matrix:
 [[91 14]
 [18 56]]
Accuracy: 0.8212290502793296
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.87      0.85       105
           1       0.80      0.76      0.78        74

    accuracy                           0.82       179
   macro avg       0.82      0.81      0.81       179
weighted avg       0.82      0.82      0.82       179



In [16]:
#Predicting a new result (Example passenger)
# Format: [Pclass, Sex, Age, SibSp, Parch, Fare, Embarked]
# Example: 2nd class, female (0), age 25, no siblings or parents, fare 30, embarked at 'S'
new_passenger = np.array([[2, 0, 25, 0, 0, 30.0, le_embarked.transform(['S'])[0]]])
new_passenger_scaled = sc.transform(new_passenger)
new_prediction = classifier.predict(new_passenger_scaled)
print("Prediction for new passenger (1 = Survived, 0 = Did not survive):", new_prediction[0])

Prediction for new passenger (1 = Survived, 0 = Did not survive): 1
