# Titanic ML Project - Structured Notebook

## 1. Libraries Required

In [ ]:
import numpy as np
import pandas as pd
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

## 2. Load Dataset

In [ ]:
# List files in input directory
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Load datasets
train_data = pd.read_csv("/kaggle/input/titanic/train.csv")
test_data = pd.read_csv("/kaggle/input/titanic/test.csv")

# Preview datasets
print("Train Data Preview:")
display(train_data.head())

print("Test Data Preview:")
display(test_data.head())

## 3. Data Exploration

In [ ]:
# Survival rate by gender
women = train_data.loc[train_data.Sex == 'female', "Survived"]
rate_women = sum(women) / len(women)
print('Survival rate of women:', rate_women)

men = train_data.loc[train_data.Sex == 'male', "Survived"]
rate_men = sum(men) / len(men)
print("Survival rate of men:", rate_men)

## 4. Feature Selection & Engineering

In [ ]:
# Define target and features
y = train_data["Survived"]
features = ["Pclass", "Sex", "SibSp", "Parch"]

# Convert categorical variables to numeric
X = pd.get_dummies(train_data[features])
X_test = pd.get_dummies(test_data[features])

## 5. Model Building

In [ ]:
# Initialize Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=1)

# Train the model
model.fit(X, y)

## 6. Predictions

In [ ]:
# Make predictions on test data
predictions = model.predict(X_test)

## 7. Kaggle Submission

In [ ]:
output = pd.DataFrame({'PassengerId': test_data.PassengerId, 'Survived': predictions})
output.to_csv('submission.csv', index=False)
print("Your submission was successful!")

## 8. Model Evaluation

In [ ]:
# Evaluate training accuracy
train_predictions = model.predict(X)
train_accuracy = accuracy_score(y, train_predictions)
print(f"Training Accuracy: {train_accuracy:.4f}")

## 9. Conclusion

In [ ]:
print("Random Forest model trained successfully.")
print("Features used:", features)
print("Random Forest is a good baseline for Titanic survival prediction.")