In [1]:
import pandas as pd

In [2]:
titanic_data = pd.read_csv("C:/Users/Lenovo/Downloads/Titanic-Dataset.csv")

In [3]:
titanic_data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [4]:
titanic_data.describe(include='all')

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
count,891.0,891.0,891.0,891,891,714.0,891.0,891.0,891.0,891.0,204,889
unique,,,,891,2,,,,681.0,,147,3
top,,,,"Braund, Mr. Owen Harris",male,,,,347082.0,,B96 B98,S
freq,,,,1,577,,,,7.0,,4,644
mean,446.0,0.383838,2.308642,,,29.699118,0.523008,0.381594,,32.204208,,
std,257.353842,0.486592,0.836071,,,14.526497,1.102743,0.806057,,49.693429,,
min,1.0,0.0,1.0,,,0.42,0.0,0.0,,0.0,,
25%,223.5,0.0,2.0,,,20.125,0.0,0.0,,7.9104,,
50%,446.0,0.0,3.0,,,28.0,0.0,0.0,,14.4542,,
75%,668.5,1.0,3.0,,,38.0,1.0,0.0,,31.0,,


In [5]:
# Fill missing values in 'Age' with the median age
titanic_data['Age'].fillna(titanic_data['Age'].median(), inplace=True)

# Fill missing values in 'Embarked' with the most frequent value
titanic_data['Embarked'].fillna(titanic_data['Embarked'].mode()[0], inplace=True)

# Drop the 'Cabin' column
titanic_data.drop(columns=['Cabin'], inplace=True)

# Encode categorical variables 
titanic_data = pd.get_dummies(titanic_data, columns=['Sex', 'Embarked'], drop_first=True)

# Ensure encoded columns are integers (0 and 1)
titanic_data['Sex_male'] = titanic_data['Sex_male'].astype(int)
titanic_data['Embarked_Q'] = titanic_data['Embarked_Q'].astype(int)
titanic_data['Embarked_S'] = titanic_data['Embarked_S'].astype(int)

# Drop irrelevant columns
titanic_data.drop(columns=['PassengerId', 'Name', 'Ticket'], inplace=True)

In [6]:
titanic_data.head()

Unnamed: 0,Survived,Pclass,Age,SibSp,Parch,Fare,Sex_male,Embarked_Q,Embarked_S
0,0,3,22.0,1,0,7.25,1,0,1
1,1,1,38.0,1,0,71.2833,0,0,0
2,1,3,26.0,0,0,7.925,0,0,1
3,1,1,35.0,1,0,53.1,0,0,1
4,0,3,35.0,0,0,8.05,1,0,1


In [7]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [8]:
# Define features and target variable
X = titanic_data.drop('Survived', axis=1)
y = titanic_data['Survived']

In [9]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the logistic regression model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

In [14]:
# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')
print('Confusion Matrix:',conf_matrix)
print('Classification Report:',class_report)

Accuracy: 0.81
Confusion Matrix: [[90 15]
 [19 55]]
Classification Report:               precision    recall  f1-score   support

           0       0.83      0.86      0.84       105
           1       0.79      0.74      0.76        74

    accuracy                           0.81       179
   macro avg       0.81      0.80      0.80       179
weighted avg       0.81      0.81      0.81       179



In [15]:
import numpy as np

In [16]:
# Function to predict survival of a passenger
def predict_survival(model, pclass, age, sibsp, parch, fare, sex_male, embarked_Q, embarked_S):
    # Create a numpy array with the features in the correct order
    passenger = np.array([[pclass, age, sibsp, parch, fare, sex_male, embarked_Q, embarked_S]])
    
    # Use the model to predict survival
    prediction = model.predict(passenger)
    
    # Return the prediction
    return 'Survived' if prediction[0] == 1 else 'Did not survive'

# Example usage of the function
example_passenger = {
    'pclass': 3,
    'age': 22,
    'sibsp': 1,
    'parch': 0,
    'fare': 7.25,
    'sex_male': 1,
    'embarked_Q': 0,
    'embarked_S': 1
}

# Predict the survival of the example passenger
result = predict_survival(
    model,
    example_passenger['pclass'],
    example_passenger['age'],
    example_passenger['sibsp'],
    example_passenger['parch'],
    example_passenger['fare'],
    example_passenger['sex_male'],
    example_passenger['embarked_Q'],
    example_passenger['embarked_S']
)

print(f"The passenger would have: {result}")

The passenger would have: Did not survive


