In [None]:
"""Logistic Regression is used for classification problems, where the output is:

Yes / No

0 / 1

True / False"""

In [None]:
"""CORE IDEA OF LOGISTIC REGRESSION

Logistic Regression works in two steps:
1. It uses a linear equation to combine input features into a single value (like in linear regression). Formula: z = b + m1*x1 + m2*x2 + ... + mn*xn
2. It then applies the logistic (sigmoid) function to this value to convert it into Formula: p = 1 / (1 + e^(-z))
a probability between 0 and 1.
"""

In [None]:
"""WHY SIGMOID IS IMPORTANT

Sigmoid ensures:

Output is always between 0 and 1

Output can be treated as probability

Smooth decision boundary

Mental picture:

Very negative → close to 0

Very positive → close to 1

Middle → 0.5"""

In [1]:
import pandas as pd #data manipulation and analysis library
import numpy as np #numerical computing library

from sklearn.model_selection import train_test_split #sklean means its a library for machine learning used for splitting data into training and testing sets.
from sklearn.linear_model import LogisticRegression #Logistic regression model from scikit-learn
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report #metrics to evaluate model performance


In [2]:
import pandas as pd
df = pd.read_csv(r"C:\Users\USER\Downloads\Titanic-Dataset.csv") # Load Titanic dataset
df.head() # Display first 5 rows of the dataset

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
# Handle missing values
df["Age"].fillna(df["Age"].median(), inplace=True)

# Encode gender
df["Sex"] = df["Sex"].map({"male": 0, "female": 1})

# One-hot encode Embarked
df = pd.get_dummies(df, columns=["Embarked"], drop_first=True)

# Drop unnecessary columns
df.drop(columns=["Name", "Ticket", "Cabin"], inplace=True, errors="ignore")


In [4]:
X = df.drop("Survived", axis=1)
y = df["Survived"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

In [5]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


In [6]:
y_pred = model.predict(X_test)


In [7]:
y_prob = model.predict_proba(X_test)
y_prob[:5]


array([[0.87391687, 0.12608313],
       [0.78137221, 0.21862779],
       [0.8469942 , 0.1530058 ],
       [0.10268452, 0.89731548],
       [0.27239993, 0.72760007]])

In [8]:
accuracy_score(y_test, y_pred)


0.8026905829596412

In [9]:
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.83      0.85      0.84       134
           1       0.76      0.73      0.75        89

    accuracy                           0.80       223
   macro avg       0.80      0.79      0.79       223
weighted avg       0.80      0.80      0.80       223

