<a href="https://colab.research.google.com/github/RITABRITA-0407/Logistic-regression/blob/main/Titanic_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Project 2: Logistic Regression on Titanic Dataset
Goal: Predict passenger survival using logistic regression.


In [None]:
pip install pandas seaborn scikit-learn matplotlib

Step 2: Load and Explore the Dataset

In [None]:
# Import essential libraries
import pandas as pd # Data handling
import seaborn as sns # Built-in Titanic dataset and plots
import matplotlib.pyplot as plt # Plotting
from sklearn.model_selection import train_test_split # Splitting data
from sklearn.linear_model import LogisticRegression # Our model
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [None]:
# Load Titanic dataset from seaborn
df = sns.load_dataset('titanic')

In [None]:
# Display the first few rows
print(df.head())

Step 3: Preprocess the Data

In [None]:
# Drop irrelevant or high-missing columns
df = df.drop(['deck', 'embark_town', 'alive', 'class', 'who', 'adult_male'], axis=1)


In [None]:
# Drop rows with missing values
df = df.dropna()

In [None]:
# Encode categorical variables: sex, embarked
df['sex'] = df['sex'].map({'male': 0, 'female': 1})
df['embarked'] = df['embarked'].map({'S': 0, 'C': 1, 'Q': 2})

In [None]:
# Verify the dataset is clean
print(df.info())

Step 4: Define Features and Target


In [None]:
# Choose input features
X = df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare', 'embarked']]  # Features

In [None]:
# Define target variable
y = df['survived']  # 1 = survived, 0 = died

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

Step 5: Train the Logistic Regression Model

In [None]:
# Create the logistic regression model
model = LogisticRegression(max_iter=1000)  # Increased iterations to ensure convergence

In [None]:
# Fit the model
model.fit(X_train, y_train)
# Output the learned coefficients
print("Model Coefficients:", model.coef_)
print("Model Intercept:", model.intercept_)

Step 6: Evaluate the Model

In [None]:
# Predict on test data
y_pred = model.predict(X_test)

In [None]:
# accuracy score
print("Accuracy:", accuracy_score(y_test, y_pred))

In [None]:
# Classification report
print("\nClassification Report:\n", classification_report(y_test, y_pred))

In [None]:
# Confusion matrix visualization
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()