In [58]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
import joblib
import numpy as np

# Load the preprocessed data
data = pd.read_csv('Dataset/iq_occupation_dataset.csv')

# Prepare feature and label datasets
# Features: 'Quiz Percentage'
X = data[['Quiz Percentage']].copy()

# Labels: 'IQ Score', 'Career Path', and 'Recommended Courses'
y = data[['IQ Score', 'Career Path', 'Recommended Courses']]

# Add noise to the 'Quiz Percentage' for variety
X.loc[:, 'Quiz Percentage'] += np.random.normal(0, 5, size=X.shape[0])

# Clip the values to maintain realistic ranges
X.loc[:, 'Quiz Percentage'] = np.clip(X['Quiz Percentage'], 0, 100)

# Split the dataset for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the random forest model for regression
rf_model = RandomForestRegressor(
    random_state=42,
    n_estimators=150,
    max_depth=6,
    min_samples_split=10,
    max_features='sqrt',
)

# Train the model
rf_model.fit(X_train, y_train[['IQ Score']].values.ravel())  # Ensure y is a 1D array

# Make predictions on the test set
y_pred = rf_model.predict(X_test)

# Calculate accuracy metrics
mae = mean_absolute_error(y_test['IQ Score'], y_pred)
r2 = r2_score(y_test['IQ Score'], y_pred)

# Calculate Mean Absolute Percentage Error (MAPE)
mape = np.mean(np.abs((y_test['IQ Score'] - y_pred) / y_test['IQ Score'])) * 100

# Calculate accuracy as percentage (100% - MAPE)
accuracy_percentage = 100 - mape

print(f"Mean Absolute Error: {mae:.2f}")
print(f"R-squared: {r2:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
print(f"Accuracy Percentage: {accuracy_percentage:.2f}%")  # Display accuracy as percentage

# Save the trained model
joblib.dump(rf_model, 'random_forest_regressor_iq.pkl')

# Load the model (for later use)
loaded_model = joblib.load('random_forest_regressor_iq.pkl')

# Function to retrieve career path and recommended courses based on the IQ Score
def get_career_path_and_courses(iq_score):
    """Function to retrieve career path and recommended courses based on IQ Score."""
    # Find the row closest to the predicted IQ Score
    closest_idx = (data['IQ Score'] - iq_score).abs().idxmin()
    predicted_row = data.iloc[closest_idx]
    career_path = predicted_row['Career Path']
    recommended_courses = predicted_row['Recommended Courses']
    return career_path, recommended_courses

# Modify the predict function to return actual values
def predict(input_percentage):
    """Function to predict the IQ score, career path, and recommended courses based on input percentage."""
    input_df = pd.DataFrame([[input_percentage]], columns=['Quiz Percentage'])  # Create DataFrame for input
    iq_score = rf_model.predict(input_df)[0]  # Predict IQ Score

    # Retrieve career path and recommended courses based on predicted IQ Score
    career_path, recommended_courses = get_career_path_and_courses(iq_score)

    return iq_score, career_path, recommended_courses  # Return IQ Score, Career Path, and Courses

# Example usage
input_percentage = 85  # Replace with actual input features (Quiz Percentage)
iq_score, career_path, recommended_courses = predict(input_percentage)
print(f"Predicted IQ Score: {iq_score}, Career Path: {career_path}, Recommended Courses: {recommended_courses}")


Mean Absolute Error: 3.97
R-squared: 0.89
Mean Absolute Percentage Error (MAPE): 4.07%
Accuracy Percentage: 95.93%
Predicted IQ Score: 113.50286411919973, Career Path: Professional careers, Recommended Courses: Accounting, nursing, business management, advanced IT courses
