In [152]:
import numpy as np
import pandas as pd

In [153]:
import seaborn as sns

# Load the Titanic dataset from Seaborn's example datasets
df = sns.load_dataset("titanic")

# Display the first few rows of the DataFrame

df.head()


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [154]:
df.shape

(891, 15)

In [155]:
df.isnull().sum()

survived         0
pclass           0
sex              0
age            177
sibsp            0
parch            0
fare             0
embarked         2
class            0
who              0
adult_male       0
deck           688
embark_town      2
alive            0
alone            0
dtype: int64

In [156]:
df = df[['sex', 'pclass','age','fare','embarked','survived']]

In [157]:
df.head()

Unnamed: 0,sex,pclass,age,fare,embarked,survived
0,male,3,22.0,7.25,S,0
1,female,1,38.0,71.2833,C,1
2,female,3,26.0,7.925,S,1
3,female,1,35.0,53.1,S,1
4,male,3,35.0,8.05,S,0


In [158]:
df['age'] = df['age'].fillna(df['age'].mean())

In [159]:
df['age']

0      22.000000
1      38.000000
2      26.000000
3      35.000000
4      35.000000
         ...    
886    27.000000
887    19.000000
888    29.699118
889    26.000000
890    32.000000
Name: age, Length: 891, dtype: float64

In [160]:
df.isnull().sum()

sex         0
pclass      0
age         0
fare        0
embarked    2
survived    0
dtype: int64

In [161]:
df['embarked'].fillna( method="ffill",inplace=True)

In [162]:
df.isnull().sum()

sex         0
pclass      0
age         0
fare        0
embarked    0
survived    0
dtype: int64

In [163]:
df.head()

Unnamed: 0,sex,pclass,age,fare,embarked,survived
0,male,3,22.0,7.25,S,0
1,female,1,38.0,71.2833,C,1
2,female,3,26.0,7.925,S,1
3,female,1,35.0,53.1,S,1
4,male,3,35.0,8.05,S,0


In [165]:
from sklearn.preprocessing import LabelEncoder

In [168]:
# Encode categorical features
encoder = LabelEncoder()
df['sex'] = encoder.fit_transform(df['sex'])
df['embarked'] = encoder.fit_transform(df['embarked'])

In [169]:
df.head()

Unnamed: 0,sex,pclass,age,fare,embarked,survived
0,1,3,22.0,7.25,2,0
1,0,1,38.0,71.2833,0,1
2,0,3,26.0,7.925,2,1
3,0,1,35.0,53.1,2,1
4,1,3,35.0,8.05,2,0


In [172]:
# Split the data into features and target
X = df.drop("survived", axis=1)
y = df["survived"]

In [173]:
X

Unnamed: 0,sex,pclass,age,fare,embarked
0,1,3,22.000000,7.2500,2
1,0,1,38.000000,71.2833,0
2,0,3,26.000000,7.9250,2
3,0,1,35.000000,53.1000,2
4,1,3,35.000000,8.0500,2
...,...,...,...,...,...
886,1,2,27.000000,13.0000,2
887,0,1,19.000000,30.0000,2
888,0,3,29.699118,23.4500,2
889,1,1,26.000000,30.0000,0


In [174]:
y

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: survived, Length: 891, dtype: int64

In [175]:
from sklearn.model_selection import train_test_split

In [176]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [177]:
#preprocessing steps
from sklearn.preprocessing import StandardScaler
train_scaler=StandardScaler()
test_scaler=StandardScaler()

In [178]:
X_train_scaled=train_scaler.fit_transform(X_train)

In [179]:
X_test_scaled=train_scaler.transform(X_test)

In [180]:
from sklearn.linear_model import LogisticRegression

# Initialize the model
model = LogisticRegression()

# Train the model
model.fit(X_train, y_train)


In [181]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", confusion)
print("Classification Report:\n", classification_rep)


Accuracy: 0.7988826815642458
Confusion Matrix:
 [[88 17]
 [19 55]]
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.84      0.83       105
           1       0.76      0.74      0.75        74

    accuracy                           0.80       179
   macro avg       0.79      0.79      0.79       179
weighted avg       0.80      0.80      0.80       179

