# Importing necessary libraries

In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [22]:
import os

file_path = "/titanic (1).csv"
print(os.path.exists(file_path))

True


In [23]:
ship = pd.read_csv('/titanic (1).csv')

In [24]:
ship.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [25]:
ship.info

In [26]:
ship.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [27]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Preprocessing the data

In [29]:
ship['Age'].fillna(ship['Age'].median(), inplace=True)
ship['Embarked'].fillna(ship['Embarked'].mode()[0], inplace=True)

# Droping unnecessary columns from the model


In [32]:
ship.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1, inplace=True)

In [33]:
ship = pd.get_dummies(ship, columns=['Sex', 'Embarked'], drop_first=True)

# Defining features (X) and target (y)

In [34]:
X = ship.drop('Survived', axis=1)
y = ship['Survived']

# Training and testing the data


In [35]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [36]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Logistic Regression model

In [38]:
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)

In [39]:
y_pred = model.predict(X_test)
y_pred_prob = model.predict_proba(X_test)[:, 1]

# Model Evaluation

In [40]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

In [41]:
print(f'Accuracy: {accuracy}')
print('Confusion Matrix:')
print(conf_matrix)
print('Classification Report:')
print(class_report)

Accuracy: 0.8100558659217877
Confusion Matrix:
[[90 15]
 [19 55]]
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.86      0.84       105
           1       0.79      0.74      0.76        74

    accuracy                           0.81       179
   macro avg       0.81      0.80      0.80       179
weighted avg       0.81      0.81      0.81       179



# Predicting Survival Probability

In [49]:
def predict_survival(features):
    features = pd.DataFrame([features])
    # Removed pd.get_dummies as features are already one-hot encoded
    for col in X.columns:
        if col not in features.columns:
            features[col] = 0
    features = features[X.columns]
    features = scaler.transform(features)
    survival_prob = model.predict_proba(features)[0, 1]
    return survival_prob

# Predicting Survival Function

In [50]:
example_features = {
    'Pclass': 3,
    'Age': 22,
    'SibSp': 1,
    'Parch': 0,
    'Fare': 7.25,
    'Sex_male': 1,  # Use 1 for male, 0 for female
    'Embarked_Q': 0,  # Use 1 if embarked from Queenstown, otherwise 0
    'Embarked_S': 1   # Use 1 if embarked from Southampton, otherwise 0
}

# Calculating the survival probability

In [51]:
survival_probability = predict_survival(example_features)
print(f'Survival Probability: {survival_probability}')

Survival Probability: 0.09399914121790588


In [52]:
survival_probability_percent = 0.09399914121790588 * 100
rounded_survival_probability = round(survival_probability_percent, 2)
print(f'Survival Probability: {rounded_survival_probability}%')

Survival Probability: 9.4%


# Survival Probability on the Titanic is 9.4%