In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the training data
train_data = pd.read_csv("train.csv")

# Data preprocessing
# Drop irrelevant columns and handle missing values
train_data.drop(["Name", "Ticket", "Cabin"], axis=1, inplace=True)
train_data["Age"].fillna(train_data["Age"].median(), inplace=True)
train_data["Embarked"].fillna(train_data["Embarked"].mode()[0], inplace=True)

# Encode categorical variables
label_encoder = LabelEncoder()
train_data["Sex"] = label_encoder.fit_transform(train_data["Sex"])
train_data["Embarked"] = label_encoder.fit_transform(train_data["Embarked"])

# Split data into features and target variable
X = train_data.drop("Survived", axis=1)
y = train_data["Survived"]

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Model training
log_reg = LogisticRegression()
log_reg.fit(X_train_scaled, y_train)

# Model evaluation
y_pred = log_reg.predict(X_val_scaled)
accuracy = accuracy_score(y_val, y_pred)
print("Accuracy:", accuracy)
print("\nClassification Report:")
print(classification_report(y_val, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_val, y_pred))


Accuracy: 0.8100558659217877

Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.86      0.84       105
           1       0.79      0.74      0.76        74

    accuracy                           0.81       179
   macro avg       0.81      0.80      0.80       179
weighted avg       0.81      0.81      0.81       179


Confusion Matrix:
[[90 15]
 [19 55]]
