In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [3]:
# Load the CSV file containing responses
df = pd.read_csv('Dataset/encoded_response.csv')

In [4]:
# # Target variable: Learning Objects Preference
target = df[[
    'Learning Objects [Slide presentation]',
    'Learning Objects [Book]',
    'Learning Objects [Lecture Note]',
    'Learning Objects [Educational game]',
    'Learning Objects [Video]',
    'Learning Objects [Audio-recorded lecture]',
    'Learning Objects [Animated instruction]',
    'Learning Objects [Real object model]',
    'Learning Objects [Mind Map]',
    'Learning Objects [Multimedia content]',
    'Learning Objects [Interactive Tool]',
    'Learning Objects [Technology-supported learning include computer-based training systems]',
    'Learning Objects [Intelligent computer-aided instruction systems]'
]]

In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df.drop(target.columns, axis=1), target, test_size=0.25, random_state=42)

In [6]:
# # Create a RandomForestClassifier for each learning object
# classifiers = {}
# for col in target.columns:
#     classifier = RandomForestClassifier(n_estimators=100, random_state=42)
#     classifier.fit(X_train, y_train[col])
#     classifiers[col] = classifier

In [15]:
from sklearn.model_selection import GridSearchCV
import joblib

In [13]:
# Create an empty dictionary to hold the classifiers
best_estimators = {}

# Loop through each learning object
for col in target.columns:
    # Define the parameter grid for hyperparameter tuning
    param_grid = {
        'n_estimators': [100, 150, 200],  # Vary the number of trees
        'max_depth': [None, 10, 20, 30],  # Vary the maximum depth of trees
        # Add other hyperparameters to tune
    }
    
    # Instantiate GridSearchCV for RandomForestClassifier
    grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, scoring='accuracy')
    
    # Fit the grid search to your data for the current learning object
    grid_search.fit(X_train, y_train[col])
    
    # Get the best parameters and best estimator for the current learning object
    best_params = grid_search.best_params_
    best_estimator = grid_search.best_estimator_
    
    # Store the best estimator in the classifiers dictionary
    best_estimators[col] = best_estimator
    
joblib.dump(best_estimators, "rf_model.joblib")

NameError: name 'joblib' is not defined

In [16]:
# joblib.dump(best_estimators, "Model/rf_model.joblib")

['Model/rf_model.joblib']

In [17]:
# Make predictions on the testing set for each learning object
y_pred = pd.DataFrame({col: classifier.predict(X_test) for col, classifier in best_estimators.items()})

sum_acc = 0
mean_acc = 0

# Evaluate the model for each learning object
accuracy = {col: accuracy_score(y_test[col], y_pred[col]) for col in target.columns}
print("Model Accuracy for Learning Objects:")
for col, acc in accuracy.items():
    print(f"{col}: {acc}")
    sum_acc += acc
    
mean_acc = sum_acc/13
print("Mean accuracy: ", mean_acc)

Model Accuracy for Learning Objects:
Learning Objects [Slide presentation]: 0.6177606177606177
Learning Objects [Book]: 0.7181467181467182
Learning Objects [Lecture Note]: 0.6872586872586872
Learning Objects [Educational game]: 0.637065637065637
Learning Objects [Video]: 0.5868725868725869
Learning Objects [Audio-recorded lecture]: 0.694980694980695
Learning Objects [Animated instruction]: 0.5675675675675675
Learning Objects [Real object model]: 0.6486486486486487
Learning Objects [Mind Map]: 0.6756756756756757
Learning Objects [Multimedia content]: 0.6023166023166023
Learning Objects [Interactive Tool]: 0.61003861003861
Learning Objects [Technology-supported learning include computer-based training systems]: 0.5945945945945946
Learning Objects [Intelligent computer-aided instruction systems]: 0.6332046332046332
Mean accuracy:  0.6364716364716365


test_size=0.1 --    
test_size=0.2 -- 0.7729 -- 0.6386  0.63647  
test_size=0.25 --     
test_size=0.3 --  

### Make predictions on a new data

In [11]:
data = pd.read_csv("Dataset/data.csv")

In [12]:
predictions = pd.DataFrame({col: classifier.predict(data) for col, classifier in classifiers.items()})
predictions.head()

Unnamed: 0,Learning Objects [Slide presentation],Learning Objects [Book],Learning Objects [Lecture Note],Learning Objects [Educational game],Learning Objects [Video],Learning Objects [Audio-recorded lecture],Learning Objects [Animated instruction],Learning Objects [Real object model],Learning Objects [Mind Map],Learning Objects [Multimedia content],Learning Objects [Interactive Tool],Learning Objects [Technology-supported learning include computer-based training systems],Learning Objects [Intelligent computer-aided instruction systems]
0,1,0,1,0,0,1,1,1,1,0,0,0,0
1,1,1,1,1,1,0,0,0,1,1,1,1,1
2,1,0,1,0,1,1,1,1,1,1,0,0,0
3,0,0,1,0,0,1,1,1,0,0,0,0,1
4,1,0,1,0,0,0,0,1,0,0,1,0,0
