In [1]:
# Import all the essentials
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Machine learning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [2]:
#Load the titanic dataset
import os
file_path = r"C:\Users\DELL\Desktop\ML\Titanic-Dataset.csv"
if os.path.exists(file_path):
    df = pd.read_csv(file_path)
    print("File loaded successfully!")
else:
    print("Error: File not found at", file_path)

File loaded successfully!


In [3]:
#displaying first 5 rows of titanic dataset 
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [14]:
# Check for missing values
print(df.isnull().sum())

# Fill missing Age with median
df['Age'] = df['Age'].fillna(df['Age'].median())

# Fill missing Embarked with mode
df['Embarked'] = df['Embarked'].fillna(df['Embarked'].mode()[0])

# Drop 'Cabin' only if it exists
if 'Cabin' in df.columns:
    df = df.drop('Cabin', axis=1)

# Same for Ticket, Name, PassengerId
for col in ['Ticket', 'Name', 'PassengerId']:
    if col in df.columns:
        df = df.drop(col, axis=1)



Survived    0
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
Embarked    0
dtype: int64


In [19]:
# Encode 'Sex' and 'Embarked' columns
label_encoder = LabelEncoder()

df['Sex'] = label_encoder.fit_transform(df['Sex'])
df['Embarked'] = label_encoder.fit_transform(df['Embarked'])
#Sex: 0 = Female, 1 = Male

#Embarked: 0 = Cherbourg (C), 1 = Queenstown (Q), 2 = Southampton (S) )
# Double check
df.head()


Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,1,22.0,1,0,7.25,2
1,1,1,0,38.0,1,0,71.2833,0
2,1,3,0,26.0,0,0,7.925,2
3,1,1,0,35.0,1,0,53.1,2
4,0,3,1,35.0,0,0,8.05,2


In [20]:
# 1. Import libraries
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# 2. Separate input (X) and output (y)
X = df.drop('Survived', axis=1)  # features
y = df['Survived']               # target

# 3. Split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Create a Random Forest model (simple but powerful)
model = RandomForestClassifier()

# 5. Train the model
model.fit(X_train, y_train)

# 6. Predict on test data
y_pred = model.predict(X_test)

# 7. Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.8212290502793296

Confusion Matrix:
 [[91 14]
 [18 56]]

Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.87      0.85       105
           1       0.80      0.76      0.78        74

    accuracy                           0.82       179
   macro avg       0.82      0.81      0.81       179
weighted avg       0.82      0.82      0.82       179



In [25]:
import joblib

# Save
joblib.dump(model, 'titanic_model.pkl')
print("Model saved successfully! ðŸŽ¯")

# Load
model = joblib.load('titanic_model.pkl')
print("Model loaded successfully! ðŸš€")



Model saved successfully! ðŸŽ¯
Model loaded successfully! ðŸš€


In [26]:
import numpy as np

# Inputs
test_passengers = np.array([
    [1, 0, 25, 0, 0, 80, 0],
    [3, 1, 30, 0, 0, 7.25, 2],
    [2, 0, 22, 1, 0, 15, 2],
    [3, 1, 45, 0, 0, 8.05, 2],
    [1, 0, 60, 1, 1, 120, 0]
])

# Predict
predictions = model.predict(test_passengers)

# Display
for i, prediction in enumerate(predictions):
    status = "Survived ðŸ›Ÿ" if prediction == 1 else "Did NOT survive ðŸš¢ðŸ’¥"
    print(f"Passenger {i+1}: {status}")


Passenger 1: Survived ðŸ›Ÿ
Passenger 2: Did NOT survive ðŸš¢ðŸ’¥
Passenger 3: Survived ðŸ›Ÿ
Passenger 4: Did NOT survive ðŸš¢ðŸ’¥
Passenger 5: Survived ðŸ›Ÿ




In [27]:
import numpy as np

# New passengers
test_passengers_2 = np.array([
    [1, 0, 19, 0, 0, 150, 0],  # Passenger 1
    [3, 1, 40, 1, 5, 15, 2],   # Passenger 2
    [2, 0, 28, 0, 0, 30, 1],   # Passenger 3
    [1, 1, 50, 1, 0, 80, 0],   # Passenger 4
    [3, 1, 24, 0, 0, 7.25, 2]  # Passenger 5
])

# Predict
predictions_2 = model.predict(test_passengers_2)

# Print results
for idx, pred in enumerate(predictions_2, 1):
    result = "Survived ðŸ›Ÿ" if pred == 1 else "Did NOT survive ðŸš¢ðŸ’¥"
    print(f"Passenger {idx}: {result}")


Passenger 1: Survived ðŸ›Ÿ
Passenger 2: Did NOT survive ðŸš¢ðŸ’¥
Passenger 3: Survived ðŸ›Ÿ
Passenger 4: Did NOT survive ðŸš¢ðŸ’¥
Passenger 5: Did NOT survive ðŸš¢ðŸ’¥




In [28]:
import numpy as np

# New passenger test data
new_passengers = np.array([
    [1, 0, 19, 0, 0, 150, 0],
    [3, 1, 40, 1, 5, 15, 2],
    [2, 0, 28, 0, 0, 30, 1],
    [1, 1, 50, 1, 0, 80, 0],
    [3, 1, 24, 0, 0, 7.25, 2]
])

# Predict
predictions = model.predict(new_passengers)

# Show Results
for idx, prediction in enumerate(predictions, start=1):
    status = "Survived ðŸ›Ÿ" if prediction == 1 else "Did NOT survive ðŸš¢ðŸ’¥"
    print(f"Passenger {idx}: {status}")


Passenger 1: Survived ðŸ›Ÿ
Passenger 2: Did NOT survive ðŸš¢ðŸ’¥
Passenger 3: Survived ðŸ›Ÿ
Passenger 4: Did NOT survive ðŸš¢ðŸ’¥
Passenger 5: Did NOT survive ðŸš¢ðŸ’¥




In [32]:
# Sample inputs where all are likely NOT to survive
new_passengers = pd.DataFrame({
    'Pclass': [3, 3, 3, 3, 3],  # 3rd class
    'Sex': [1, 1, 1, 1, 1],     # Male
    'Age': [40, 22, 35, 28, 50],# Adult males
    'SibSp': [0, 0, 0, 0, 0],   # Traveling alone
    'Parch': [0, 0, 0, 0, 0],   # No parents/children
    'Fare': [7, 8, 6, 9, 5],    # Cheap tickets
    'Embarked': [2, 2, 2, 2, 2] # Embarked from 'S'
})

# Predict using the loaded model
predictions = model.predict(new_passengers)
print("Model Raw Predictions:", predictions)


# Display predictions
for idx, pred in enumerate(predictions, start=1):
    if pred == 1:
        print(f"Passenger {idx}: Survived ðŸ›Ÿ")
    else:
        print(f"Passenger {idx}: Did NOT survive ðŸš¢ðŸ’¥")



Model Raw Predictions: [0 0 0 0 0]
Passenger 1: Did NOT survive ðŸš¢ðŸ’¥
Passenger 2: Did NOT survive ðŸš¢ðŸ’¥
Passenger 3: Did NOT survive ðŸš¢ðŸ’¥
Passenger 4: Did NOT survive ðŸš¢ðŸ’¥
Passenger 5: Did NOT survive ðŸš¢ðŸ’¥


In [34]:
import pandas as pd

# Create new test passengers
new_passengers = pd.DataFrame({
    'Pclass': [1, 1, 3, 3, 3],  # 1st class (more chance), 3rd class (less chance)
    'Sex': [0, 0, 1, 1, 1],     # 0 = Female (high survival), 1 = Male (low survival)
    'Age': [25, 30, 40, 35, 50],# Reasonable ages
    'SibSp': [0, 1, 0, 0, 0],   # Family members
    'Parch': [0, 0, 0, 0, 0],   # Parents/Children aboard
    'Fare': [100, 80, 7, 6, 8], # Big fares for 1st class, low fares for 3rd class
    'Embarked': [0, 0, 2, 2, 2] # Embarked points (doesn't matter much here)
})

# Predict
predictions = model.predict(new_passengers)

# Output results
for i, pred in enumerate(predictions):
    if pred == 1:
        print(f"Passenger {i+1}: Survived ðŸ›Ÿ")
    else:
        print(f"Passenger {i+1}: Did NOT survive ðŸš¢ðŸ’¥")


Passenger 1: Survived ðŸ›Ÿ
Passenger 2: Survived ðŸ›Ÿ
Passenger 3: Did NOT survive ðŸš¢ðŸ’¥
Passenger 4: Did NOT survive ðŸš¢ðŸ’¥
Passenger 5: Did NOT survive ðŸš¢ðŸ’¥


In [35]:
import joblib

# Save the trained model
joblib.dump(model, 'titanic_model.pkl')
print("âœ… Model saved as titanic_model.pkl")


âœ… Model saved as titanic_model.pkl


In [36]:
joblib.dump(label_encoder, 'label_encoder.pkl')


['label_encoder.pkl']

In [37]:
import pandas as pd
import joblib

# Load model
model = joblib.load('titanic_model.pkl')

# Create new passengers (same as before)
new_passengers = pd.DataFrame({
    'Pclass': [1, 1, 3, 3, 3],
    'Sex': [0, 0, 1, 1, 1],
    'Age': [25, 30, 40, 35, 50],
    'SibSp': [0, 1, 0, 0, 0],
    'Parch': [0, 0, 0, 0, 0],
    'Fare': [100, 80, 7, 6, 8],
    'Embarked': [0, 0, 2, 2, 2]
})

# Predict
predictions = model.predict(new_passengers)

# Show results
for i, pred in enumerate(predictions):
    if pred == 1:
        print(f"Passenger {i+1}: Survived ðŸ›Ÿ")
    else:
        print(f"Passenger {i+1}: Did NOT survive ðŸš¢ðŸ’¥")


Passenger 1: Survived ðŸ›Ÿ
Passenger 2: Survived ðŸ›Ÿ
Passenger 3: Did NOT survive ðŸš¢ðŸ’¥
Passenger 4: Did NOT survive ðŸš¢ðŸ’¥
Passenger 5: Did NOT survive ðŸš¢ðŸ’¥
