# Importing necessary libraries

In [80]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [106]:
import pandas as pd
titanic_df = pd.read_csv('titanic.csv')
titanic_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,0,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,1,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,0,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,0,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,1,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [82]:
titanic_df.drop(columns=['Name','Cabin', 'Ticket'], inplace=True)


titanic_df['Age'].fillna(titanic_df['Age'].median(), inplace=True)
titanic_df['Embarked'].fillna(titanic_df['Embarked'].mode()[0], inplace=True)


label_encoder = LabelEncoder()
titanic_df['Sex'] = label_encoder.fit_transform(titanic_df['Sex'])
titanic_df['Embarked'] = label_encoder.fit_transform(titanic_df['Embarked'])

features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']

In [97]:
missing_values = titanic_df.isnull().sum()
print(missing_values)


imputer_age = SimpleImputer(strategy='mean')
titanic_df['Age'] = imputer_age.fit_transform(titanic_df[['Age']])

imputer_embarked = SimpleImputer(strategy='most_frequent')
titanic_df['Embarked'] = imputer_embarked.fit_transform(titanic_df[['Embarked']])


missing_values_after_imputation = titanic_df.isnull().sum()
print(missing_values_after_imputation)

PassengerId    0
Survived       0
Pclass         0
Name           0
Sex            0
Age            0
SibSp          0
Parch          0
Fare           1
Embarked       0
dtype: int64
PassengerId    0
Survived       0
Pclass         0
Name           0
Sex            0
Age            0
SibSp          0
Parch          0
Fare           1
Embarked       0
dtype: int64


In [102]:
imputer_fare = SimpleImputer(strategy='median')
titanic_df['Fare'] = imputer_fare.fit_transform(titanic_df[['Fare']])


label_encoder_sex = LabelEncoder()
titanic_df['Sex'] = label_encoder_sex.fit_transform(titanic_df['Sex'])

label_encoder_embarked = LabelEncoder()
titanic_df['Embarked'] = label_encoder_embarked.fit_transform(titanic_df['Embarked'])


scaler = StandardScaler()
features = ['Age', 'Fare', 'Parch', 'Pclass', 'Sex', 'SibSp', 'Embarked']
titanic_df[features] = scaler.fit_transform(titanic_df[features])

In [85]:

imputer_fare = SimpleImputer(strategy='median')
titanic_df['Fare'] = imputer_fare.fit_transform(titanic_df[['Fare']])


label_encoder_sex = LabelEncoder()
titanic_df['Sex'] = label_encoder_sex.fit_transform(titanic_df['Sex'])

label_encoder_embarked = LabelEncoder()
titanic_df['Embarked'] = label_encoder_embarked.fit_transform(titanic_df['Embarked'])


scaler = StandardScaler()
features = ['Age', 'Fare', 'Parch', 'Pclass', 'Sex', 'SibSp', 'Embarked']
titanic_df[features] = scaler.fit_transform(titanic_df[features])

# Training the model

In [86]:
X = titanic_df.drop(columns=['Survived', 'PassengerId'])
y = titanic_df['Survived']

In [87]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)

In [88]:

y_pred = model.predict(X_test)

# Evaluation of the model

In [89]:

accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)
confusion_mat = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(classification_rep)
print("Confusion Matrix:")
print(confusion_mat)


Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        50
           1       1.00      1.00      1.00        34

    accuracy                           1.00        84
   macro avg       1.00      1.00      1.00        84
weighted avg       1.00      1.00      1.00        84

Confusion Matrix:
[[50  0]
 [ 0 34]]


# Prediction

In [107]:
imputer_fare = SimpleImputer(strategy='median')
titanic_df['Fare'] = imputer_fare.fit_transform(titanic_df[['Fare']])


label_encoder_sex = LabelEncoder()
titanic_df['Sex'] = label_encoder_sex.fit_transform(titanic_df['Sex'])

label_encoder_embarked = LabelEncoder()
titanic_df['Embarked'] = label_encoder_embarked.fit_transform(titanic_df['Embarked'])


scaler = StandardScaler()
features = ['Age', 'Fare', 'Parch', 'Pclass', 'Sex', 'SibSp', 'Embarked']
titanic_df[features] = scaler.fit_transform(titanic_df[features])

In [108]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, LabelEncoder

titanic_df.drop(columns=['Cabin', 'Ticket'], inplace=True)


titanic_df['Age'].fillna(titanic_df['Age'].median(), inplace=True)
titanic_df['Embarked'].fillna(titanic_df['Embarked'].mode()[0], inplace=True)


label_encoder = LabelEncoder()
titanic_df['Sex'] = label_encoder.fit_transform(titanic_df['Sex'])
titanic_df['Embarked'] = label_encoder.fit_transform(titanic_df['Embarked'])


features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']


X = titanic_df[features]
y = titanic_df['Survived']


model = LogisticRegression(random_state=42)
model.fit(X, y)


def predict_survival(name):
    passenger = titanic_df[titanic_df['Name'].str.contains(name, case=False)]
    
    if passenger.empty:
        return f"Passenger '{name}' not found."
    
    passenger_features = passenger[features].values
    
    if len(passenger_features) == 0:
        return f"No features found for passenger '{name}'."
    
    prediction = model.predict(passenger_features)
    survival_status = "Survived" if prediction[0] == 1 else "Did not survive"
    return f"Passenger '{name}' {survival_status}."


name_to_predict = "Kelly, Mr. James"
prediction_result = predict_survival(name_to_predict)
print(prediction_result)


Passenger 'Kelly, Mr. James' Did not survive.


