<a href="https://colab.research.google.com/github/NATO-dotcom/ML-AI_Python/blob/main/Smart%20pace%20recommender%20model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
data = pd.read_csv('/content/study_recommendation_dataset_refined.csv')
data

Unnamed: 0,subject_type,time_of_day,user_focus_score,task_difficulty,prev_method_used,recommended_duration,recommended_method,mood,user_type
0,coding,evening,7,3,group_discussion,104,project_work,neutral,intermediate
1,coding,evening,1,3,quizzes,39,coding,neutral,advanced
2,theory,afternoon,10,3,quizzes,57,reading,neutral,beginner
3,coding,evening,8,1,quizzes,59,coding,tired,advanced
4,theory,morning,3,2,quizzes,24,reading,neutral,intermediate
...,...,...,...,...,...,...,...,...,...
495,coding,evening,10,1,solo,120,coding,motivated,advanced
496,theory,morning,5,2,group_discussion,44,group_discussion,neutral,beginner
497,theory,morning,8,1,quizzes,82,reading,motivated,advanced
498,math,morning,6,2,solo,62,problem_solving,neutral,advanced


In [3]:
from sklearn.preprocessing import LabelEncoder
categorical_columns = ['subject_type', 'time_of_day', 'prev_method_used', 'recommended_method', 'mood', 'user_type']
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le  # Save encoder for decoding later

In [4]:
from sklearn.model_selection import train_test_split
#features(inputs)
X = data.drop(['recommended_duration','recommended_method'],axis=1)
#targets(outputs)
y_duration = data['recommended_duration']
y_method = data['recommended_method']
#spit into training/testing
X_train, X_test, y_dur_train, y_dur_test, y_method_train, y_method_test = train_test_split(X, y_duration, y_method, test_size=0.2, random_state=42)

In [5]:
#training Model 1 To predict duration(regression)
from sklearn.ensemble import RandomForestRegressor
model_duration = RandomForestRegressor(n_estimators=100, random_state=42)
model_duration.fit(X_train, y_dur_train)


In [6]:
#training Model 2 To predict method(classification)
from sklearn.ensemble import RandomForestClassifier
model_method = RandomForestClassifier(n_estimators=100, random_state=42)
model_method.fit(X_train, y_method_train)

In [7]:
#Regression evaluation (model 1)
from sklearn.metrics import mean_absolute_error,mean_squared_error, r2_score
pred_dur = model_duration.predict(X_test)
print("MAE:", mean_absolute_error(y_dur_test, pred_dur))
print("MSE:", mean_squared_error(y_dur_test, pred_dur))
print("R2 Score:", r2_score(y_dur_test, pred_dur))


MAE: 11.262004523809523
MSE: 195.85035548918935
R2 Score: 0.7586221937164124


In [8]:
#classification evaluation(model 2)
from sklearn.metrics import accuracy_score, classification_report
pred_method = model_method.predict(X_test)
print("Accuracy:", accuracy_score(y_method_test, pred_method))
print(classification_report(y_method_test, pred_method))


Accuracy: 0.53
              precision    recall  f1-score   support

           0       0.50      0.44      0.47        18
           1       0.65      0.68      0.67        19
           2       0.40      0.46      0.43        13
           3       0.47      0.53      0.50        17
           4       0.50      0.44      0.47        16
           5       0.62      0.59      0.61        17

    accuracy                           0.53       100
   macro avg       0.52      0.52      0.52       100
weighted avg       0.53      0.53      0.53       100



In [9]:
#Making predictions from both models
#Encoding(categorical values)
def recommend(subject_type, time_of_day, focus_score, task_difficulty, prev_method, mood, user_type):
    input_data = pd.DataFrame([[
      label_encoders['subject_type'].transform([subject_type])[0],
        label_encoders['time_of_day'].transform([time_of_day])[0],
        focus_score,
        task_difficulty,
        label_encoders['prev_method_used'].transform([prev_method])[0],
        label_encoders['mood'].transform([mood])[0],
        label_encoders['user_type'].transform([user_type])[0],
    ]],columns=X.columns)

    duration_prediction = int(model_duration.predict(input_data)[0])
    method_encoded = model_method.predict(input_data)[0]
    method_prediction = label_encoders['recommended_method'].inverse_transform([method_encoded])[0]

    return{'recommended_duration': duration_prediction,
           'recommended_method': method_prediction
    }
#Try a sample
print(recommend('math','afternoon',7,2,'solo','motivated','intermediate'))


{'recommended_duration': 77, 'recommended_method': 'quizzes'}


In [10]:
import joblib
joblib.dump(model_duration, 'model_duration.pkl')
joblib.dump(model_method, 'model_method.pkl')
for key, encoder in label_encoders.items():
    joblib.dump(encoder, f"le_{key}.pkl")


In [11]:
import os
print(os.getcwd())


/content


In [12]:
from google.colab import files
files.download("model_duration.pkl")
files.download("model_method.pkl")
for key, encoder in label_encoders.items():
    joblib.dump(encoder, f"le_{key}.pkl")
    files.download(f"le_{key}.pkl")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>