# Import The Required Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import log_loss, mean_squared_error, mean_absolute_error, r2_score

# Read The DataSet

In [2]:
df = pd.read_csv("tested.csv")

In [3]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,0,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,1,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,0,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,0,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,1,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [4]:
df.shape

(418, 12)

In [5]:
df['Sex'].unique()

array(['male', 'female'], dtype=object)

In [6]:
df['Sex'] = df['Sex'].replace({'male':1,'female':2})

In [7]:
df.head(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,0,3,"Kelly, Mr. James",1,34.5,0,0,330911,7.8292,,Q
1,893,1,3,"Wilkes, Mrs. James (Ellen Needs)",2,47.0,1,0,363272,7.0,,S
2,894,0,2,"Myles, Mr. Thomas Francis",1,62.0,0,0,240276,9.6875,,Q


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  418 non-null    int64  
 1   Survived     418 non-null    int64  
 2   Pclass       418 non-null    int64  
 3   Name         418 non-null    object 
 4   Sex          418 non-null    int64  
 5   Age          332 non-null    float64
 6   SibSp        418 non-null    int64  
 7   Parch        418 non-null    int64  
 8   Ticket       418 non-null    object 
 9   Fare         417 non-null    float64
 10  Cabin        91 non-null     object 
 11  Embarked     418 non-null    object 
dtypes: float64(2), int64(6), object(4)
memory usage: 39.3+ KB


# Drop columns

In [9]:
# Assuming you have already loaded your DataFrame 'df'
df.drop(columns=['Name'], inplace=True)

# Encoding

In [10]:
label_encoder = LabelEncoder()
df['Embarked'] = label_encoder.fit_transform(df['Embarked'])
df['Cabin'] = label_encoder.fit_transform(df['Cabin'])
df['Ticket'] = label_encoder.fit_transform(df['Ticket'])

In [11]:
df.head(3)

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,0,3,1,34.5,0,0,152,7.8292,76,1
1,893,1,3,2,47.0,1,0,221,7.0,76,2
2,894,0,2,1,62.0,0,0,73,9.6875,76,1


# Fill The Null Values

In [12]:
df.fillna({
    'Age': df['Age'].mode().mean(),
    'Fare': df['Fare'].mode().mean(),
    'Cabin': df.Cabin.mode().mean()
},inplace = True)

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  418 non-null    int64  
 1   Survived     418 non-null    int64  
 2   Pclass       418 non-null    int64  
 3   Sex          418 non-null    int64  
 4   Age          418 non-null    float64
 5   SibSp        418 non-null    int64  
 6   Parch        418 non-null    int64  
 7   Ticket       418 non-null    int32  
 8   Fare         418 non-null    float64
 9   Cabin        418 non-null    int32  
 10  Embarked     418 non-null    int32  
dtypes: float64(2), int32(3), int64(6)
memory usage: 31.2 KB


In [14]:
df.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
count,418.0,418.0,418.0,418.0,418.0,418.0,418.0,418.0,418.0,418.0,418.0
mean,1100.5,0.363636,2.26555,1.363636,28.673445,0.447368,0.392344,180.944976,35.560497,67.437799,1.401914
std,120.810458,0.481622,0.841838,0.481622,13.020267,0.89676,0.981429,107.533763,55.857145,19.091405,0.854496
min,892.0,0.0,1.0,1.0,0.17,0.0,0.0,0.0,0.0,0.0,0.0
25%,996.25,0.0,1.0,1.0,22.5,0.0,0.0,85.25,7.8958,76.0,1.0
50%,1100.5,0.0,3.0,1.0,24.0,0.0,0.0,181.0,14.4542,76.0,2.0
75%,1204.75,1.0,3.0,2.0,35.75,1.0,0.0,279.75,31.471875,76.0,2.0
max,1309.0,1.0,3.0,2.0,76.0,8.0,9.0,362.0,512.3292,76.0,2.0


# Define Input and output

In [15]:
X = df.drop(['Survived'], axis =1)
y = df['Survived']

# Split the data

In [16]:
x_train, x_test, y_train, y_test = train_test_split(X,y, test_size=0.2,random_state=42)

# Scale the data

In [17]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# create a model

In [18]:
model  = LogisticRegression()
model = LogisticRegression(max_iter=1000) 
model.fit(x_train, y_train)

# Predict

In [19]:
y_pred =model.predict(x_test)

# Compare the Predicted and Actual Values

In [20]:
df5 = pd.DataFrame({'Actual ': y_test , 'Predicted': y_pred})

In [21]:
df5

Unnamed: 0,Actual,Predicted
321,0,0
324,1,1
388,0,0
56,0,0
153,1,1
...,...,...
57,0,0
126,0,0
24,1,1
17,0,0


#  Metrices Calculation

In [22]:
loss = log_loss(y_test, model.predict_proba(x_test))
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

accuracy = accuracy_score(y_test, y_pred)

print(f'Log Loss: {loss:.2f}')
print(f'MSE: {mse:.2f}')
print(f'MAE: {mae:.2f}')
print(f'R2 Score: {r2:.2f}')

Log Loss: 0.01
MSE: 0.00
MAE: 0.00
R2 Score: 1.00


In [23]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        50
           1       1.00      1.00      1.00        34

    accuracy                           1.00        84
   macro avg       1.00      1.00      1.00        84
weighted avg       1.00      1.00      1.00        84



# Model Accuracy

In [24]:
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 1.00
