In [29]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [30]:
# Load dataset
data = pd.read_csv('titanic.csv')

# Preview the data
print(data.head())

# Summary statistics
print(data.info())

# Check for missing values
print(data.isnull().sum())

   PassengerId  Survived  Pclass  \
0          892         0       3   
1          893         1       3   
2          894         0       2   
3          895         0       3   
4          896         1       3   

                                           Name     Sex   Age  SibSp  Parch  \
0                              Kelly, Mr. James    male  34.5      0      0   
1              Wilkes, Mrs. James (Ellen Needs)  female  47.0      1      0   
2                     Myles, Mr. Thomas Francis    male  62.0      0      0   
3                              Wirz, Mr. Albert    male  27.0      0      0   
4  Hirvonen, Mrs. Alexander (Helga E Lindqvist)  female  22.0      1      1   

    Ticket     Fare Cabin Embarked  
0   330911   7.8292   NaN        Q  
1   363272   7.0000   NaN        S  
2   240276   9.6875   NaN        Q  
3   315154   8.6625   NaN        S  
4  3101298  12.2875   NaN        S  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (

In [31]:
# Fill missing 'Age' with median
data['Age'].fillna(data['Age'].median(), inplace=True)

# Drop 'Cabin' column (too many missing values)
data.drop('Cabin', axis=1, inplace=True)

# Fill missing 'Embarked' with the most common value
data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)

# Convert 'Sex' to numerical values (0 = male, 1 = female)
data['Sex'] = data['Sex'].map({'male': 0, 'female': 1})

# Convert 'Embarked' to numerical values (C=1, Q=2, S=3)
data['Embarked'] = data['Embarked'].map({'C': 1, 'Q': 2, 'S': 3})

# Drop irrelevant columns
data.drop(['Name', 'Ticket', 'PassengerId'], axis=1, inplace=True)


In [37]:
from sklearn.model_selection import train_test_split

# Features (independent variables)
X = data.drop('Survived', axis=1)

# Target (dependent variable)
y = data['Survived']

# Split the dataset (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [44]:
# Import necessary modules
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.impute import SimpleImputer

# Handle missing values by filling them with the median value
imputer = SimpleImputer(strategy='median')

# Impute the missing values in the training and test sets
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

# Create and train the Logistic Regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_imputed, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test_imputed)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

# Classification report
print("Classification Report:\n", classification_report(y_test, y_pred))



Accuracy: 1.00
Confusion Matrix:
 [[50  0]
 [ 0 34]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        50
           1       1.00      1.00      1.00        34

    accuracy                           1.00        84
   macro avg       1.00      1.00      1.00        84
weighted avg       1.00      1.00      1.00        84



In [48]:
import numpy as np

# Input features for the new passenger (example): Pclass, Sex, Age, SibSp, Parch, Fare, Embarked
new_passenger = np.array([[1, 1, 21, 1, 0, 7.25, 3]])  # Example: Third class, male, age 22, 1 sibling, fare 7.25, embarked from Southampton

# Impute missing values if necessary (but for this example we assume all data is present)
# If you had imputed missing values in your training data, ensure the same imputation is applied here
new_passenger_imputed = imputer.transform(new_passenger)

# Use the trained model to predict the survival of the new passenger
survival_prediction = model.predict(new_passenger_imputed)

# Output the result
if survival_prediction[0] == 1:
    print("The passenger survived.")
else:
    print("The passenger did not survive.")



The passenger survived.


