In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
# Load the Titanic training dataset
titanic_train = pd.read_csv('train.csv')

In [3]:
# Load the Titanic testing dataset
titanic_test = pd.read_csv('test.csv')

In [5]:
# Preprocess the data
titanic_train['Age'] = titanic_train['Age'].fillna(titanic_train['Age'].mean())
titanic_test['Age'] = titanic_test['Age'].fillna(titanic_test['Age'].mean())

titanic_train['Embarked'] = titanic_train['Embarked'].fillna('S')
titanic_test['Embarked'] = titanic_test['Embarked'].fillna('S')

titanic_train['Cabin'] = titanic_train['Cabin'].fillna('Unknown')
titanic_test['Cabin'] = titanic_test['Cabin'].fillna('Unknown')

In [6]:
# Extract the first letter of the Cabin column (since it's a categorical variable)
titanic_train['Cabin'] = titanic_train['Cabin'].apply(lambda x: x[0] if x != 'Unknown' else 'U')
titanic_test['Cabin'] = titanic_test['Cabin'].apply(lambda x: x[0] if x != 'Unknown' else 'U')

In [7]:
# Convert categorical variables to numerical variables
titanic_train['Sex'] = titanic_train['Sex'].map({'male': 0, 'female': 1})
titanic_train['Embarked'] = titanic_train['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})
titanic_train['Cabin'] = titanic_train['Cabin'].map({'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'T': 7, 'U': 8})

In [8]:
titanic_test['Sex'] = titanic_test['Sex'].map({'male': 0, 'female': 1})
titanic_test['Embarked'] = titanic_test['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})
titanic_test['Cabin'] = titanic_test['Cabin'].map({'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'T': 7, 'U': 8})

In [9]:
# Define the feature and target variables
X_train = titanic_train[['Pclass', 'Age', 'Sex', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Cabin']]
y_train = titanic_train['Survived']

In [10]:
X_test = titanic_test[['Pclass', 'Age', 'Sex', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Cabin']]

In [11]:
# Create a Random Forest Classifier model
rfc = RandomForestClassifier(n_estimators=100, random_state=42)

In [12]:
# Train the model
rfc.fit(X_train, y_train)

In [13]:
# Make predictions on the testing set
y_pred = rfc.predict(X_test)

In [15]:
# Save the predictions to a submission file
submission = pd.DataFrame({'PassengerId': titanic_test['PassengerId']-891, 'Survived': y_pred})
submission.to_csv('submission.csv', index=False)