In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.metrics import confusion_matrix, accuracy_score, mean_squared_error
import statsmodels.api as sm

In [2]:
titanic_data = pd.read_csv('titanic.csv')

In [14]:
total_passengers = len(titanic_data)
num_survived = titanic_data['survived'].sum()
prob_survival = num_survived / total_passengers
print(f"Probability of Survival: {prob_survival:.4f}")

Probability of Survival: 0.3820


In [15]:
titanic_data['age'].fillna(titanic_data['age'].median())

bins = [0, 12, 20, 35, 60, 80]
labels = ['Child', 'Teenager', 'Young Adult', 'Adult', 'Senior']
titanic_data['age_group'] = pd.cut(titanic_data['age'], bins=bins, labels=labels)

survival_table = titanic_data.groupby(['pclass', 'sex', 'age_group'])['survived'].mean().reset_index()

survival_table = survival_table.rename(columns={'survived': 'survival_probability'})
print(survival_table)

  survival_table = titanic_data.groupby(['pclass', 'sex', 'age_group'])['survived'].mean().reset_index()


    pclass     sex    age_group  survival_probability
0        1  female        Child              0.000000
1        1  female     Teenager              1.000000
2        1  female  Young Adult              0.980000
3        1  female        Adult              0.967213
4        1  female       Senior              0.833333
5        1    male        Child              1.000000
6        1    male     Teenager              0.333333
7        1    male  Young Adult              0.452381
8        1    male        Adult              0.321429
9        1    male       Senior              0.066667
10       2  female        Child              1.000000
11       2  female     Teenager              0.928571
12       2  female  Young Adult              0.884615
13       2  female        Adult              0.833333
14       2  female       Senior                   NaN
15       2    male        Child              1.000000
16       2    male     Teenager              0.117647
17       2    male  Young Ad

In [19]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

titanic_data = titanic_data.dropna(subset=['age','sex', 'pclass'])
le = LabelEncoder()
titanic_data['sex'] = le.fit_transform(titanic_data['sex'])

X = titanic_data[['pclass', 'sex', 'age']]
y = titanic_data['survived']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


model = LogisticRegression()
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy_score(y_test, y_pred))

print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("classification Report:\n", classification_report(y_test, y_pred))




Accuracy: 0.7380952380952381
Confusion Matrix:
 [[96 24]
 [31 59]]
classification Report:
               precision    recall  f1-score   support

           0       0.76      0.80      0.78       120
           1       0.71      0.66      0.68        90

    accuracy                           0.74       210
   macro avg       0.73      0.73      0.73       210
weighted avg       0.74      0.74      0.74       210



In [22]:
y_pred = model.predict(X_test_scaled)

cm = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = cm.ravel()
print(f"True Negatives: {tn}, False Positives: {fp}, False Negatives: {fn}, True Positives: {tp}")

True Negatives: 96, False Positives: 24, False Negatives: 31, True Positives: 59


In [25]:
TN, FP, FN, TP = cm.ravel()
accuracy = (TP + TN) / (TP + TN + FP + FN)
print(f"Accuracy calculated from confusion matrix: {accuracy:.2f}")

Accuracy calculated from confusion matrix: 0.74
