In [4]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

# Load training and test datasets
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

# Drop irrelevant columns
drop_columns = ['Name', 'Ticket', 'Cabin']
train_df = train_df.drop(columns=drop_columns)
test_df = test_df.drop(columns=drop_columns)

# Fill missing values
train_df['Age'].fillna(train_df['Age'].median(), inplace=True)
train_df['Embarked'].fillna(train_df['Embarked'].mode()[0], inplace=True)
test_df['Age'].fillna(train_df['Age'].median(), inplace=True)
test_df['Fare'].fillna(train_df['Fare'].median(), inplace=True)  # Corrected: fill Fare in test with train's median
test_df['Embarked'].fillna(train_df['Embarked'].mode()[0], inplace=True)

# Convert categorical to numerical
train_df['Sex'] = train_df['Sex'].map({'male': 0, 'female': 1})
test_df['Sex'] = test_df['Sex'].map({'male': 0, 'female': 1})
train_df['Embarked'] = train_df['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})
test_df['Embarked'] = test_df['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})

# Include additional features that may affect the model
train_df['FamilySize'] = train_df['SibSp'] + train_df['Parch']
test_df['FamilySize'] = test_df['SibSp'] + test_df['Parch']

# Separate features and target
X = train_df.drop(columns='Survived')
y = train_df['Survived']
X_test = test_df.copy()

# Scale the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test)

# Train logistic regression model
model = LogisticRegression(max_iter=1000)  # Set max_iter to ensure convergence
model.fit(X_scaled, y)

# Predict on test data
predictions = model.predict(X_test_scaled)

# Save predictions to a CSV file (no header, one column)
pd.DataFrame(predictions).to_csv("predictions.csv", index=False, header=False)

print("Predictions saved successfully.")


Predictions saved successfully.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df['Age'].fillna(train_df['Age'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df['Embarked'].fillna(train_df['Embarked'].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the interme