In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import pickle
import os

Constants

In [None]:
NUM_SAMPLE = 10  # Number of samples to generate
VALIDATION_SAMPLE = 100  # Fixed validation dataset size
output_dir = "saved_models"  # Directory to save the model

Create the directory to save models if it doesn't exist

In [None]:
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

1. Generate synthetic training data (replace with real data if available)

In [None]:
np.random.seed(42)
# Features: Hours of Study, Exam Difficulty, Interest in Subject, Previous Exam Scores, Sleep Hours
X = np.random.rand(NUM_SAMPLE, 5) * 10  # Random values representing 5 features
y = (np.random.rand(NUM_SAMPLE) > 0.5).astype(int)  # Random pass/fail labels (0 or 1)
print(f"{X} | {y}")

2. Split data into training and test sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

3. Create and train a Random Forest model

In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

4. Predict on the test data

In [None]:
y_pred = model.predict(X_test)

5. Evaluate the model

In [51]:
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

# Print evaluation metrics
print(f"Accuracy: {accuracy}")
print(f"F1 Score: {f1}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")

6. Save the model using pickle

In [None]:
model_save_path = os.path.join(output_dir, f'random_forest_model_{NUM_SAMPLE}.pkl')
with open(model_save_path, 'wb') as file:
    pickle.dump(model, file)
print(f"Model saved at: {model_save_path}")

7. Load the saved model and predict on new data

In [None]:
with open(model_save_path, 'rb') as file:
    loaded_model = pickle.load(file)

Test Model

In [50]:
# Generate new student data (replace with actual input data)
X_new_student = np.array([[4, 9, 7, 5, 1]])  # Hours of Study, Exam Difficulty, Interest, Previous Score, Sleep Hours

# Predict pass/fail for the new student
y_new_pred = loaded_model.predict(X_new_student)

# Print prediction
print(f"Predicted pass/fail for new student: {'Pass' if y_new_pred[0] == 1 else 'Fail'}")

[[3.74540119 9.50714306 7.31993942 5.98658484 1.5601864 ]
 [1.5599452  0.58083612 8.66176146 6.01115012 7.08072578]
 [0.20584494 9.69909852 8.32442641 2.12339111 1.81824967]
 [1.8340451  3.04242243 5.24756432 4.31945019 2.9122914 ]
 [6.11852895 1.39493861 2.92144649 3.66361843 4.56069984]
 [7.85175961 1.99673782 5.14234438 5.92414569 0.46450413]
 [6.07544852 1.70524124 0.65051593 9.48885537 9.65632033]
 [8.08397348 3.04613769 0.97672114 6.84233027 4.40152494]
 [1.22038235 4.9517691  0.34388521 9.09320402 2.58779982]
 [6.62522284 3.11711076 5.20068021 5.46710279 1.84854456]] | [1 1 1 1 1 1 0 0 0 0]
Accuracy: 1.0
F1 Score: 1.0
Precision: 1.0
Recall: 1.0
Model saved at: saved_models/random_forest_model_10.pkl
Predicted pass/fail for new student: Pass
