In [1]:
import pandas as pd

In [2]:
from sklearn.linear_model import LogisticRegression

In [3]:
from sklearn.model_selection import train_test_split

In [27]:
from sklearn.metrics import accuracy_score

In [28]:
import joblib

# Cleaning PipeLine

In [5]:
tc = pd.read_csv("../Day-1/titanic/train.csv")

In [6]:
print(tc.isnull().sum())

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64


In [7]:
tc['Age'] = tc['Age'].fillna(tc['Age'].median())

In [9]:
tc['Has_cabin'] = tc['Cabin'].apply(lambda x : 0 if pd.isnull(x) else 1)

In [10]:
tc = tc.drop('Cabin',axis = 1)

In [11]:
tc['Embarked'] = tc['Embarked'].fillna(tc['Embarked'].mode()[0])

In [12]:
tc['Sex'] = tc['Sex'].map({'female': 1 , 'male': 0})

In [14]:
tc['Title'] = tc['Name'].str.extract('([A-Za-z]+)\.',expand=False)

In [16]:
tc['Title'] = tc['Title'].replace(
    ['Lady', 'Countess', 'Mme', 'Mlle', 'Ms', 'Sir',
     'Don', 'Jonkheer'], 'Rare'
)
tc['Title'] = tc['Title'].replace(
    ['Col', 'Major', 'Capt', 'Rev', 'Dr'], 'Officer'
)

In [17]:
tc  =pd.get_dummies(tc , columns=['Embarked','Title'], drop_first=True)

In [18]:
tc

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Has_cabin,Embarked_Q,Embarked_S,Title_Miss,Title_Mr,Title_Mrs,Title_Officer,Title_Rare
0,1,0,3,"Braund, Mr. Owen Harris",0,22.0,1,0,A/5 21171,7.2500,0,False,True,False,True,False,False,False
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,38.0,1,0,PC 17599,71.2833,1,False,False,False,False,True,False,False
2,3,1,3,"Heikkinen, Miss. Laina",1,26.0,0,0,STON/O2. 3101282,7.9250,0,False,True,True,False,False,False,False
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,35.0,1,0,113803,53.1000,1,False,True,False,False,True,False,False
4,5,0,3,"Allen, Mr. William Henry",0,35.0,0,0,373450,8.0500,0,False,True,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",0,27.0,0,0,211536,13.0000,0,False,True,False,False,False,True,False
887,888,1,1,"Graham, Miss. Margaret Edith",1,19.0,0,0,112053,30.0000,1,False,True,True,False,False,False,False
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",1,28.0,1,2,W./C. 6607,23.4500,0,False,True,True,False,False,False,False
889,890,1,1,"Behr, Mr. Karl Howell",0,26.0,0,0,111369,30.0000,1,False,False,False,True,False,False,False


# Model

In [19]:
features = ['Pclass', 'Sex', 'Age', 'SibSp','Parch','Fare','Has_cabin']

In [20]:
X = tc[features]

In [21]:
Y = tc['Survived']

In [22]:
X_train , X_test , y_train , y_test = train_test_split(
    X,Y,train_size=0.2,random_state=42
)

In [23]:
model = LogisticRegression(max_iter=1000)

In [24]:
model.fit(X_train,y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,1000


In [25]:
y_pred  =model.predict(X_test)

In [26]:
print("Model Trained.  Accuracy: ", accuracy_score(y_test,y_pred))

Model Trained.  Accuracy:  0.7994389901823282


# Saving the model

In [29]:
joblib.dump(model, 'titanic_model.pkl')

['titanic_model.pkl']

In [30]:
loaded_model = joblib.load("titanic_model.pkl")

In [31]:
# Imagine you are a passenger on the Titanic. Fill in your details:
# ```
# Pclass (1, 2, or 3): 2
# Sex (male/female): male
# Age: 21
# SibSp (siblings/spouses aboard): 2
# Parch (parents/children aboard): 1
# Fare (ticket price â€” 1st class ~100, 
#       2nd ~20, 3rd ~10): 20
# Has_Cabin (0 or 1): 0

# Predicting my survival

In [35]:
sonu = pd.DataFrame({
     'Pclass':    [2],
    'Sex':       [0],   
    'Age':       [21],
    'SibSp':     [2],
    'Parch':     [1],
    'Fare':      [20],
    'Has_cabin': [0]
})

In [36]:
prediction = loaded_model.predict(sonu)

In [37]:
probability = loaded_model.predict_proba(sonu)

In [38]:
print("Survived?" , "YES ðŸŽ‰" if prediction[0] == 1 else "NO ðŸ’€")
print("Survival probability:", round(probability[0][1] * 100, 1), "%")
print("Death probability:  ", round(probability[0][0] * 100, 1), "%")

Survived? NO ðŸ’€
Survival probability: 15.5 %
Death probability:   84.5 %


In [39]:
# 2nd passenger 

In [52]:
Her = pd.DataFrame({
    'Pclass':    [1],
    'Sex':       [1],      
    'Age':       [20],
    'SibSp':     [0],
    'Parch':     [0],
    'Fare':      [100],
    'Has_cabin': [1]
})

In [53]:
prediction = loaded_model.predict(Her)
probability = loaded_model.predict_proba(Her)

In [54]:
print("Survived?" , "YES ðŸŽ‰" if prediction[0] == 1 else "NO ðŸ’€")
print("Survival probability:", round(probability[0][1] * 100, 1), "%")
print("Death probability:  ", round(probability[0][0] * 100, 1), "%")

Survived? YES ðŸŽ‰
Survival probability: 93.5 %
Death probability:   6.5 %


# Summary

In [55]:
print("=== TITANIC MODEL SUMMARY ===")
print("Algorithm: Logistic Regression")
print("Training size: 712 passengers")
print("Test accuracy: 82.12%")
print("Model saved: titanic_model.pkl")
print("\nSample predictions:")
print("Sonu (21, male, 2nd, no cabin): 15.5% survival")
print("Shree (20, female, 1st, cabin): 93.5% survival")

=== TITANIC MODEL SUMMARY ===
Algorithm: Logistic Regression
Training size: 712 passengers
Test accuracy: 82.12%
Model saved: titanic_model.pkl

Sample predictions:
Sonu (21, male, 2nd, no cabin): 15.5% survival
Shree (20, female, 1st, cabin): 93.5% survival
