In [57]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load datasets
character_predictions = pd.read_csv("character-predictions.csv")
character_deaths = pd.read_csv("character-deaths.csv")

# Combine datasets based on common keys
merged_data = pd.merge(character_predictions, character_deaths, how='left', left_on="name", right_on='Name')

# Identify features and target variable
selected_features = ['isNoble', 'male', 'age', 'numDeadRelations', 'boolDeadRelations', 'isPopular', 'popularity']
X = merged_data[selected_features]
y = merged_data['actual']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Handle missing values using mean imputation
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

# Initialize the Random Forest model
rf_model = RandomForestClassifier(n_estimators=1000, random_state=0)

# Train the model with imputed data
rf_model.fit(X_train_imputed, y_train)

# Make predictions on the test set with imputed data
y_pred = rf_model.predict(X_test_imputed)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Print the evaluation metrics
print(f"Accuracy: {accuracy}")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Classification Report:\n{classification_rep}")

Accuracy: 0.7384615384615385
Confusion Matrix:
[[ 31  75]
 [ 27 257]]
Classification Report:
              precision    recall  f1-score   support

           0       0.53      0.29      0.38       106
           1       0.77      0.90      0.83       284

    accuracy                           0.74       390
   macro avg       0.65      0.60      0.61       390
weighted avg       0.71      0.74      0.71       390

