In [None]:
#import necessary libraries
#(if u get any error during import. Use this syntax, "pip install library_name" for installation and run import code)
# (library_name is the name of the library required to install - numpy,pandas,etc...)
#eg : pip install scikit-learn

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import plotly.graph_objects as go

**Read data**

In [None]:
#read the data from file by using pandas dataframe
df = pd.read_csv('/content/titanic.csv')

# **Data Preprocessing**

In [None]:
#print the data
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [None]:
df.tail()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


In [None]:
#total number of rows and column
x,y = df.shape
print("No of rows :",x)
print("No of column: ",y)

No of rows : 891
No of column:  12


Here is the meaning of each feature in the Titanic dataset:

- `PassengerId`: A unique identifier assigned to each passenger.
- `Survived`: Indicates whether the passenger survived or not (0 = No, 1 = Yes).
- `Pclass`: The passenger class (1 = 1st class, 2 = 2nd class, 3 = 3rd class).
- `Name`: The name of the passenger.
- `Sex`: The gender of the passenger (male or female).
- `Age`: The age of the passenger.
- `SibSp`: The number of siblings/spouses aboard the Titanic.
- `Parch`: The number of parents/children aboard the Titanic.
- `Ticket`: The ticket number.
- `Fare`: The fare (ticket price) paid by the passenger.
- `Cabin`: The cabin number.
- `Embarked`: The port of embarkation (C = Cherbourg, Q = Queenstown, S = Southampton).



In [None]:
df.drop(['PassengerId', 'Name', 'Ticket', 'Cabin', 'Embarked'], axis=1, inplace=True)

In [None]:
df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare
0,0,3,male,22.0,1,0,7.25
1,1,1,female,38.0,1,0,71.2833
2,1,3,female,26.0,0,0,7.925
3,1,1,female,35.0,1,0,53.1
4,0,3,male,35.0,0,0,8.05


In [None]:
df.isna().sum()

Survived      0
Pclass        0
Sex           0
Age         177
SibSp         0
Parch         0
Fare          0
dtype: int64

In [None]:
df['Age'].fillna(df['Age'].mean(), inplace=True)

In [None]:
df.dtypes

Survived      int64
Pclass        int64
Sex          object
Age         float64
SibSp         int64
Parch         int64
Fare        float64
dtype: object

In [None]:
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})

In [None]:
df.dtypes

Survived      int64
Pclass        int64
Sex           int64
Age         float64
SibSp         int64
Parch         int64
Fare        float64
dtype: object

In [None]:
df.shape

(891, 7)

In [None]:
# Separate the features and target variable
X = df.drop('Survived', axis=1)
y = df['Survived']

In [None]:
X

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare
0,3,0,22.000000,1,0,7.2500
1,1,1,38.000000,1,0,71.2833
2,3,1,26.000000,0,0,7.9250
3,1,1,35.000000,1,0,53.1000
4,3,0,35.000000,0,0,8.0500
...,...,...,...,...,...,...
886,2,0,27.000000,0,0,13.0000
887,1,1,19.000000,0,0,30.0000
888,3,1,29.699118,1,2,23.4500
889,1,0,26.000000,0,0,30.0000


In [None]:
y

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X_train

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare
331,1,0,45.500000,0,0,28.5000
733,2,0,23.000000,0,0,13.0000
382,3,0,32.000000,0,0,7.9250
704,3,0,26.000000,1,0,7.8542
813,3,1,6.000000,4,2,31.2750
...,...,...,...,...,...,...
106,3,1,21.000000,0,0,7.6500
270,1,0,29.699118,0,0,31.0000
860,3,0,41.000000,2,0,14.1083
435,1,1,14.000000,1,2,120.0000


In [None]:
y_train

331    0
733    0
382    0
704    0
813    0
      ..
106    1
270    0
860    0
435    1
102    0
Name: Survived, Length: 712, dtype: int64

In [None]:
X_test

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare
709,3,0,29.699118,1,1,15.2458
439,2,0,31.000000,0,0,10.5000
840,3,0,20.000000,0,0,7.9250
720,2,1,6.000000,0,1,33.0000
39,3,1,14.000000,1,0,11.2417
...,...,...,...,...,...,...
433,3,0,17.000000,0,0,7.1250
773,3,0,29.699118,0,0,7.2250
25,3,1,38.000000,1,5,31.3875
84,2,1,17.000000,0,0,10.5000


In [None]:
y_test

709    1
439    0
840    0
720    1
39     1
      ..
433    0
773    0
25     1
84     1
10     1
Name: Survived, Length: 179, dtype: int64

In [None]:
# Scale the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# Model training
random_forest = RandomForestClassifier()
random_forest.fit(X_train_scaled, y_train)

In [None]:
# Model prediction
y_pred = random_forest.predict(X_test_scaled)

In [None]:
y_pred #predicted

array([0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1,
       0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 1, 1])

In [None]:
y_test  #actual

709    1
439    0
840    0
720    1
39     1
      ..
433    0
773    0
25     1
84     1
10     1
Name: Survived, Length: 179, dtype: int64

In [None]:
# Model Evaluation
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)
print("Confusion Matrix:\n", cm)

Accuracy: 0.8100558659217877
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.86      0.84       105
           1       0.79      0.74      0.76        74

    accuracy                           0.81       179
   macro avg       0.81      0.80      0.80       179
weighted avg       0.81      0.81      0.81       179

Confusion Matrix:
 [[90 15]
 [19 55]]


In [None]:
# Create actual vs predicted plot using Plotly
fig = go.Figure()
fig.add_trace(go.Scatter(x=y_test.index, y=y_test, mode='markers', name='Actual'))
fig.add_trace(go.Scatter(x=y_test.index, y=y_pred, mode='markers', name='Predicted'))
fig.update_layout(title='Actual vs Predicted',
                  xaxis_title='Index',
                  yaxis_title='Survived',
                  legend=dict(x=0.1, y=1),
                  showlegend=True)
fig.show()

In [None]:
# Create confusion matrix heatmap using Plotly
labels = ['Not Survived', 'Survived']
fig = go.Figure(data=go.Heatmap(
    z=cm,
    x=labels,
    y=labels,
    colorscale='Blues',
    text=cm,
    hoverinfo='text'
))
fig.update_layout(
    title='Confusion Matrix',
    xaxis_title='Predicted',
    yaxis_title='Actual',
    xaxis=dict(side='top')
)
fig.show()