In [253]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# === 1. Load Data ===
train_df = pd.read_csv(r"C:\Users\Nares\OneDrive\Desktop\train.csv")
test_df = pd.read_csv(r"C:\Users\Nares\OneDrive\Desktop\test.csv")

# === 2. Preprocess train_df ===
train_df['Age'] = train_df['Age'].fillna(train_df['Age'].median())
train_df['Fare'] = train_df['Fare'].fillna(train_df['Fare'].median())
train_df['Sex'] = train_df['Sex'].map({'male': 0, 'female': 1})
train_df['Cabin'] = train_df['Cabin'].notnull().astype(int)
train_df['Embarked'] = train_df['Embarked'].fillna(train_df['Embarked'].mode()[0])
embarked_dummies_train = pd.get_dummies(train_df['Embarked'], prefix='Embarked')
train_df = pd.concat([train_df, embarked_dummies_train], axis=1)
for col in ['Embarked_C', 'Embarked_Q', 'Embarked_S']:
    if col not in train_df.columns:
        train_df[col] = 0

# === 3. Select Features ===
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Cabin',
            'Embarked_C', 'Embarked_Q', 'Embarked_S']
X = train_df[features]
y = train_df['Survived']

# === 4. Train/Validation Split ===
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# === 5. Train Model ===
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# === 6. Evaluate on Validation Set ===
y_pred = model.predict(X_val)
print(f"Validation Accuracy: {accuracy_score(y_val, y_pred):.2f}")

# === 7. Preprocess test_df ===
test_df['Age'] = test_df['Age'].fillna(train_df['Age'].median())
test_df['Fare'] = test_df['Fare'].fillna(train_df['Fare'].median())
test_df['Sex'] = test_df['Sex'].map({'male': 0, 'female': 1})
test_df['Cabin'] = test_df['Cabin'].notnull().astype(int)
test_df['Embarked'] = test_df['Embarked'].fillna(train_df['Embarked'].mode()[0])
embarked_dummies_test = pd.get_dummies(test_df['Embarked'], prefix='Embarked')
test_df = pd.concat([test_df, embarked_dummies_test], axis=1)
for col in ['Embarked_C', 'Embarked_Q', 'Embarked_S']:
    if col not in test_df.columns:
        test_df[col] = 0

# === 8. Predict on Test Set ===
X_test = test_df[features]
test_predictions = model.predict(X_test)

# === 9. Create Submission File ===
submission = pd.DataFrame({
    'PassengerId': test_df['PassengerId'],
    'Survived': test_predictions
})
submission.to_csv('titanic_predictions.csv', index=False)
print("Submission file saved as 'titanic_predictions.csv'")


Validation Accuracy: 0.82
Submission file saved as 'titanic_predictions.csv'
