In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

## Load data

In [2]:
# Read the dataset
df = pd.read_csv('Datasets/latest_responses_encoded.csv')

df

Unnamed: 0,Gender,Level of Study,6. Online Instructional Strategies/Assessment [Demonstration],6. Online Instructional Strategies/Assessment [Digital Lab Experiments],6. Online Instructional Strategies/Assessment [Forum],6. Online Instructional Strategies/Assessment [Case Study],6. Online Instructional Strategies/Assessment [Concept Mapping],6. Online Instructional Strategies/Assessment [Real Time Online Exam],6. Online Instructional Strategies/Assessment [Individual Project/Assignment],6. Online Instructional Strategies/Assessment [Group Project/Assignment],...,"27. If I am very angry_I stomp about, slam doors and throw things",28. I find it easiest to remember_Faces,28. I find it easiest to remember_Names,28. I find it easiest to remember_Things I have done,29. I think I can tell someone is lying because_The vibes I get from them,29. I think I can tell someone is lying because_Their voice changes,29. I think I can tell someone is lying because_They avoid looking at you,30. When I'm meeting with an old friend_I give them a hug or a handshake,"30. When I'm meeting with an old friend_I say ""it's great to hear your voice!""","30. When I'm meeting with an old friend_I say ""it's great to see you!"""
0,0,3,1,1,0,0,1,0,1,1,...,False,False,False,True,True,False,False,False,False,True
1,0,3,1,1,0,0,1,0,1,1,...,False,False,False,True,True,False,False,False,False,True
2,0,3,1,1,0,0,1,0,1,1,...,False,False,False,True,True,False,False,False,False,True
3,0,3,1,1,0,0,1,0,1,1,...,False,False,False,True,True,False,False,False,False,True
4,0,3,1,1,0,0,1,0,1,1,...,False,False,False,True,True,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1976,0,2,1,1,1,1,1,1,1,1,...,False,True,False,False,True,False,False,False,False,True
1977,0,2,1,1,1,1,1,1,1,1,...,False,True,False,False,True,False,False,False,False,True
1978,0,2,1,0,0,0,0,0,0,0,...,False,False,False,True,True,False,False,False,False,True
1979,0,2,1,0,0,0,0,0,0,0,...,False,False,False,True,True,False,False,False,False,True


## Extract target variables

In [3]:
# Extract online assessment columns as target
target_columns = df.iloc[:, 2:15]
target_columns.columns

Index(['6. Online Instructional Strategies/Assessment [Demonstration]',
       '6. Online Instructional Strategies/Assessment [Digital Lab Experiments]',
       '6. Online Instructional Strategies/Assessment [Forum]',
       '6. Online Instructional Strategies/Assessment [Case Study]',
       '6. Online Instructional Strategies/Assessment [Concept Mapping]',
       '6. Online Instructional Strategies/Assessment [Real Time Online Exam]',
       '6. Online Instructional Strategies/Assessment [Individual Project/Assignment]',
       '6. Online Instructional Strategies/Assessment [Group Project/Assignment]',
       '6. Online Instructional Strategies/Assessment [Online Quiz/Test - MCQ]',
       '6. Online Instructional Strategies/Assessment [Online Quiz/Test - Essay]',
       '6. Online Instructional Strategies/Assessment [Online Quiz/Test - Open Book]',
       '6. Online Instructional Strategies/Assessment [Peer Review Assessment Live Presentation]',
       '6. Online Instructional Strate

## Split data

In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df.drop(target_columns.columns, axis=1), target_columns, 
                                                    test_size=0.3, random_state=42)

## Train the model

In [5]:
# Initialise an empty dictionary to store the trained model
model_dict = {}

# Train a Random Forest Classifier
for column in target_columns.columns:
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train[column])
    model_dict[column] = model

In [6]:
# Display the parameters for each trained model
for column, model in model_dict.items():
    print(f"Model Parameters for {column}:")
    model_params = model.get_params()
    for param, value in model_params.items():
        print(f"{param}: {value}")
    print("\n")

Model Parameters for 6. Online Instructional Strategies/Assessment [Demonstration]:
bootstrap: True
ccp_alpha: 0.0
class_weight: None
criterion: gini
max_depth: None
max_features: sqrt
max_leaf_nodes: None
max_samples: None
min_impurity_decrease: 0.0
min_samples_leaf: 1
min_samples_split: 2
min_weight_fraction_leaf: 0.0
n_estimators: 100
n_jobs: None
oob_score: False
random_state: 42
verbose: 0
warm_start: False


Model Parameters for 6. Online Instructional Strategies/Assessment [Digital Lab Experiments]:
bootstrap: True
ccp_alpha: 0.0
class_weight: None
criterion: gini
max_depth: None
max_features: sqrt
max_leaf_nodes: None
max_samples: None
min_impurity_decrease: 0.0
min_samples_leaf: 1
min_samples_split: 2
min_weight_fraction_leaf: 0.0
n_estimators: 100
n_jobs: None
oob_score: False
random_state: 42
verbose: 0
warm_start: False


Model Parameters for 6. Online Instructional Strategies/Assessment [Forum]:
bootstrap: True
ccp_alpha: 0.0
class_weight: None
criterion: gini
max_depth: N

## Evaluate the model

In [6]:
# Make predictions on the test set
y_pred = pd.DataFrame({col: model.predict(X_test) for col, model in model_dict.items()})

# Classification Report
print("Classification Report:")
print(classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.98      0.95       389
           1       0.98      0.77      0.86       163
           2       1.00      0.83      0.91       140
           3       0.96      0.86      0.91       173
           4       0.99      0.74      0.85       179
           5       1.00      0.78      0.88       153
           6       0.93      0.85      0.89       265
           7       0.92      0.83      0.87       252
           8       0.88      0.95      0.91       344
           9       0.98      0.81      0.89       161
          10       0.87      0.97      0.92       359
          11       0.97      0.84      0.90       176
          12       0.89      0.89      0.89       312

   micro avg       0.93      0.88      0.90      3066
   macro avg       0.95      0.85      0.89      3066
weighted avg       0.93      0.88      0.90      3066
 samples avg       0.91      0.89      0.88      3066



In [8]:
# Initialize a dictionary to store accuracy scores
accuracy_scores = {}

# Loop through each column and calculate accuracy score
for col in y_test.columns:
    accuracy = accuracy_score(y_test[col], y_pred[col])
    accuracy_scores[col] = accuracy
    print(f"Accuracy for {col}: {accuracy}")

# Overall accuracy score
overall_accuracy = accuracy_score(y_test.values.flatten(), y_pred.values.flatten())
print(f"\nOverall Accuracy: {overall_accuracy}")

Accuracy for 6. Online Instructional Strategies/Assessment [Demonstration]: 0.9294117647058824
Accuracy for 6. Online Instructional Strategies/Assessment [Digital Lab Experiments]: 0.9327731092436975
Accuracy for 6. Online Instructional Strategies/Assessment [Forum]: 0.9596638655462185
Accuracy for 6. Online Instructional Strategies/Assessment [Case Study]: 0.9478991596638655
Accuracy for 6. Online Instructional Strategies/Assessment [Concept Mapping]: 0.9210084033613445
Accuracy for 6. Online Instructional Strategies/Assessment [Real Time Online Exam]: 0.9428571428571428
Accuracy for 6. Online Instructional Strategies/Assessment [Individual Project/Assignment]: 0.9058823529411765
Accuracy for 6. Online Instructional Strategies/Assessment [Group Project/Assignment]: 0.8957983193277311
Accuracy for 6. Online Instructional Strategies/Assessment [Online Quiz/Test - MCQ]: 0.8957983193277311
Accuracy for 6. Online Instructional Strategies/Assessment [Online Quiz/Test - Essay]: 0.94453781512

In [None]:
# Get the model parameters
model_params = model.get_params()

# Display the parameters
print("Model Parameters:")
for param, value in model_params.items():
    print(f"{param}: {value}")

## Save the model

In [49]:
joblib.dump(model_dict, 'Model/rf.joblib')

['Model/rf.joblib']

## Make predictions on new data

In [50]:
test = pd.read_csv('Datasets/test_data.csv')

In [51]:
predictions = pd.DataFrame({col: model.predict(test) for col, model in model_dict.items()})

predictions.head()

Unnamed: 0,6. Online Instructional Strategies/Assessment [Demonstration],6. Online Instructional Strategies/Assessment [Digital Lab Experiments],6. Online Instructional Strategies/Assessment [Forum],6. Online Instructional Strategies/Assessment [Case Study],6. Online Instructional Strategies/Assessment [Concept Mapping],6. Online Instructional Strategies/Assessment [Real Time Online Exam],6. Online Instructional Strategies/Assessment [Individual Project/Assignment],6. Online Instructional Strategies/Assessment [Group Project/Assignment],6. Online Instructional Strategies/Assessment [Online Quiz/Test - MCQ],6. Online Instructional Strategies/Assessment [Online Quiz/Test - Essay],6. Online Instructional Strategies/Assessment [Online Quiz/Test - Open Book],6. Online Instructional Strategies/Assessment [Peer Review Assessment Live Presentation],6. Online Instructional Strategies/Assessment [Recorded Presentation]
0,1,0,0,0,0,0,0,0,1,0,1,0,1
1,1,0,0,0,0,0,0,0,0,0,1,1,0
2,1,1,0,0,1,0,1,1,1,1,1,0,0


In [52]:
classifier = joblib.load('Model/rf.joblib')
predictions = pd.DataFrame({col: model.predict(test) for col, model in classifier.items()})

predictions.head()

Unnamed: 0,6. Online Instructional Strategies/Assessment [Demonstration],6. Online Instructional Strategies/Assessment [Digital Lab Experiments],6. Online Instructional Strategies/Assessment [Forum],6. Online Instructional Strategies/Assessment [Case Study],6. Online Instructional Strategies/Assessment [Concept Mapping],6. Online Instructional Strategies/Assessment [Real Time Online Exam],6. Online Instructional Strategies/Assessment [Individual Project/Assignment],6. Online Instructional Strategies/Assessment [Group Project/Assignment],6. Online Instructional Strategies/Assessment [Online Quiz/Test - MCQ],6. Online Instructional Strategies/Assessment [Online Quiz/Test - Essay],6. Online Instructional Strategies/Assessment [Online Quiz/Test - Open Book],6. Online Instructional Strategies/Assessment [Peer Review Assessment Live Presentation],6. Online Instructional Strategies/Assessment [Recorded Presentation]
0,1,0,0,0,0,0,0,0,1,0,1,0,1
1,1,0,0,0,0,0,0,0,0,0,1,1,0
2,1,1,0,0,1,0,1,1,1,1,1,0,0


In [53]:
# predictions.to_csv('Datasets/prediction.csv', index=False)