In [3]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Load datasets
train_df = pd.read_csv("tested.csv")
test_df = pd.read_csv("test.csv")

# Combine train and test to handle preprocessing uniformly (drop target for test)
train_df['source'] = 'train'
test_df['source'] = 'test'
test_df['Survived'] = None  # Placeholder to keep columns consistent
combined = pd.concat([train_df, test_df], ignore_index=True)

# Basic preprocessing
combined['Sex'] = LabelEncoder().fit_transform(combined['Sex'])
combined['Embarked'] = combined['Embarked'].fillna('S')
combined['Embarked'] = LabelEncoder().fit_transform(combined['Embarked'])
combined['Age'] = combined['Age'].fillna(combined['Age'].median())
combined['Fare'] = combined['Fare'].fillna(combined['Fare'].median())

# Select features
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
train_data = combined[combined['source'] == 'train']
test_data = combined[combined['source'] == 'test']

X_train = train_data[features]
y_train = train_data['Survived'].astype(int)
X_test = test_data[features]

# Train model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

# Predict on test set
test_predictions = model.predict(X_test)

# Add predictions to test_df
test_df['Survived'] = test_predictions.astype(int)

# Save predictions
test_df[['PassengerId', 'Survived']].to_csv("titanic_predictions.csv", index=False)

print("Model trained and predictions saved to 'titanic_predictions.csv'")


Model trained and predictions saved to 'titanic_predictions.csv'
