In [1]:

import pickle
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

In [2]:
# Load the features
with open('features.pkl', 'rb') as f:
    features = pickle.load(f)

# Load the target
with open('target.pkl', 'rb') as f:
    target = pickle.load(f)

# Load the test features
with open('test_features.pkl', 'rb') as f:
    test_features = pickle.load(f)

# Load the test target
with open('test_target.pkl', 'rb') as f:
    test_target = pickle.load(f)

# Load the validation features
with open('validation_features.pkl', 'rb') as f:
    validation_features = pickle.load(f)

# Load the validation target
with open('validation_target.pkl', 'rb') as f:
    validation_target = pickle.load(f)

In [3]:
def calculate_rmse_and_predict(model, features, actual):
    predicted = model.predict(features)
    residuals = actual - predicted
    rmse = np.sqrt(np.mean(residuals**2))
    return rmse, predicted

In [4]:
# Initialize the Random Forest Classifier
rf_clf = RandomForestClassifier(random_state=42)

# Train the classifier
rf_clf.fit(features, target)

# Predict on the test data
rf_test_predictions = rf_clf.predict(test_features)

# Calculate the accuracy
rf_test_accuracy = accuracy_score(test_target, rf_test_predictions)
print(f"Random Forest Test Accuracy: {rf_test_accuracy}")

Random Forest Test Accuracy: 0.7173756955663256


In [5]:
# Calculate RMSE and predictions for test data
rf_rmse_test, rf_test_predictions = calculate_rmse_and_predict(rf_clf, test_features, test_target)
print(f"Random Forest RMSE on test data: {rf_rmse_test}")

# Calculate RMSE and predictions for validation data
rf_rmse_validation, rf_validation_predictions = calculate_rmse_and_predict(rf_clf, validation_features, validation_target)
print(f"Random Forest RMSE on validation data: {rf_rmse_validation}")

Random Forest RMSE on test data: 0.5316242135509578
Random Forest RMSE on validation data: 0.5297401319296363


In [6]:
# Load existing model results
with open('model_results.pkl', 'rb') as f:
    model_results = pickle.load(f)

# Update the model results with new data
model_results['Random Forest'] = {
    'Accuracy': rf_test_accuracy,
    'RMSE (test set)': rf_rmse_test,
    'RMSE (Val set)': rf_rmse_validation
}

# Save updated model results
with open('model_results.pkl', 'wb') as f:
    pickle.dump(model_results, f)
