In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [2]:
# Step 1: Load the Titanic dataset
data = pd.read_csv('tested.csv')

In [3]:
# Step 2: Preprocess the data
# Drop irrelevant columns and handle missing values
data = data.drop(['PassengerId', 'Name', 'Ticket', 'Cabin', 'Embarked'], axis=1)
data['Age'].fillna(data['Age'].median(), inplace=True)
data['Fare'].fillna(data['Fare'].median(), inplace=True)

In [4]:
# Convert categorical variables to numerical using one-hot encoding
data = pd.get_dummies(data, columns=['Sex', 'Pclass'])

In [5]:
# Split the data into training and testing sets
X = data.drop('Survived', axis=1)
y = data['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Step 3: Train the model
model = RandomForestClassifier()
model.fit(X_train, y_train)


RandomForestClassifier()

In [7]:
# Step 4: Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [8]:
# Step 5: Feature importance analysis
feature_importances = pd.DataFrame({'Feature': X_train.columns, 'Importance': model.feature_importances_})
feature_importances = feature_importances.sort_values('Importance', ascending=False)
print(feature_importances)

      Feature  Importance
4  Sex_female    0.482290
5    Sex_male    0.481871
3        Fare    0.021952
2       Parch    0.004989
0         Age    0.003791
1       SibSp    0.001866
8    Pclass_3    0.001469
6    Pclass_1    0.001419
7    Pclass_2    0.000353


In [12]:
# Step 6: Prediction
# Assuming you have a new passenger's information stored in a dictionary called 'new_passenger'
new_passenger = {
     'Age': 30,
    'Fare': 50,
    'Sex_female': 1,
    'Sex_male': 0,
    'Pclass_1': 0,
    'Pclass_2': 1,
    'Pclass_3': 0,
    'SibSp': 1,
    'Parch': 0
}
new_passenger_df = pd.DataFrame([new_passenger])
survival_prediction = model.predict(new_passenger_df)
print("Survival Prediction:", survival_prediction[0])

Survival Prediction: 0
