In [1]:
# !pip install xgboost

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

## Load data

In [3]:
# Read the dataset
df = pd.read_csv('Datasets/latest_responses_encoded.csv')

df.head()

Unnamed: 0,Gender,Level of Study,6. Online Instructional Strategies/Assessment [Demonstration],6. Online Instructional Strategies/Assessment [Digital Lab Experiments],6. Online Instructional Strategies/Assessment [Forum],6. Online Instructional Strategies/Assessment [Case Study],6. Online Instructional Strategies/Assessment [Concept Mapping],6. Online Instructional Strategies/Assessment [Real Time Online Exam],6. Online Instructional Strategies/Assessment [Individual Project/Assignment],6. Online Instructional Strategies/Assessment [Group Project/Assignment],...,"27. If I am very angry_I stomp about, slam doors and throw things",28. I find it easiest to remember_Faces,28. I find it easiest to remember_Names,28. I find it easiest to remember_Things I have done,29. I think I can tell someone is lying because_The vibes I get from them,29. I think I can tell someone is lying because_Their voice changes,29. I think I can tell someone is lying because_They avoid looking at you,30. When I'm meeting with an old friend_I give them a hug or a handshake,"30. When I'm meeting with an old friend_I say ""it's great to hear your voice!""","30. When I'm meeting with an old friend_I say ""it's great to see you!"""
0,0,3,1,1,0,0,1,0,1,1,...,False,False,False,True,True,False,False,False,False,True
1,0,3,1,1,0,0,1,0,1,1,...,False,False,False,True,True,False,False,False,False,True
2,0,3,1,1,0,0,1,0,1,1,...,False,False,False,True,True,False,False,False,False,True
3,0,3,1,1,0,0,1,0,1,1,...,False,False,False,True,True,False,False,False,False,True
4,0,3,1,1,0,0,1,0,1,1,...,False,False,False,True,True,False,False,False,False,True


## Extract target variables

In [4]:
# Extract online assessment columns as target
target_columns = df.iloc[:, 2:15]
target_columns.columns

Index(['6. Online Instructional Strategies/Assessment [Demonstration]',
       '6. Online Instructional Strategies/Assessment [Digital Lab Experiments]',
       '6. Online Instructional Strategies/Assessment [Forum]',
       '6. Online Instructional Strategies/Assessment [Case Study]',
       '6. Online Instructional Strategies/Assessment [Concept Mapping]',
       '6. Online Instructional Strategies/Assessment [Real Time Online Exam]',
       '6. Online Instructional Strategies/Assessment [Individual Project/Assignment]',
       '6. Online Instructional Strategies/Assessment [Group Project/Assignment]',
       '6. Online Instructional Strategies/Assessment [Online Quiz/Test - MCQ]',
       '6. Online Instructional Strategies/Assessment [Online Quiz/Test - Essay]',
       '6. Online Instructional Strategies/Assessment [Online Quiz/Test - Open Book]',
       '6. Online Instructional Strategies/Assessment [Peer Review Assessment Live Presentation]',
       '6. Online Instructional Strate

## Split data

In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df.drop(target_columns.columns, axis=1), target_columns, 
                                                    test_size=0.3, random_state=42)

## Train the model

In [6]:
# Initialise an empty dictionary to store the trained model
model_dict = {}

# Train a Random Forest Classifier
for column in target_columns.columns:
    model = XGBClassifier(random_state=42)
    model.fit(X_train, y_train[column])
    model_dict[column] = model

## Evaluate the model

In [7]:
# Make predictions on the test set
y_pred = pd.DataFrame({col: model.predict(X_test) for col, model in model_dict.items()})

# Classification Report
print("Classification Report:")
print(classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.93      0.93       389
           1       0.90      0.78      0.84       163
           2       0.90      0.88      0.89       140
           3       0.93      0.87      0.90       173
           4       0.91      0.83      0.87       179
           5       0.97      0.79      0.87       153
           6       0.91      0.86      0.89       265
           7       0.89      0.87      0.88       252
           8       0.89      0.94      0.92       344
           9       0.94      0.83      0.88       161
          10       0.89      0.96      0.92       359
          11       0.90      0.86      0.88       176
          12       0.91      0.92      0.92       312

   micro avg       0.91      0.89      0.90      3066
   macro avg       0.92      0.87      0.89      3066
weighted avg       0.91      0.89      0.90      3066
 samples avg       0.89      0.89      0.88      3066



  _warn_prf(average, modifier, msg_start, len(result))


In [8]:
# Initialize a dictionary to store accuracy scores
accuracy_scores = {}

# Loop through each column and calculate accuracy score
for col in y_test.columns:
    accuracy = accuracy_score(y_test[col], y_pred[col])
    accuracy_scores[col] = accuracy
    print(f"Accuracy for {col}: {accuracy}")

# Overall accuracy score
overall_accuracy = accuracy_score(y_test.values.flatten(), y_pred.values.flatten())
print(f"\nOverall Accuracy: {overall_accuracy}")

Accuracy for 6. Online Instructional Strategies/Assessment [Demonstration]: 0.9109243697478991
Accuracy for 6. Online Instructional Strategies/Assessment [Digital Lab Experiments]: 0.9159663865546218
Accuracy for 6. Online Instructional Strategies/Assessment [Forum]: 0.9495798319327731
Accuracy for 6. Online Instructional Strategies/Assessment [Case Study]: 0.9428571428571428
Accuracy for 6. Online Instructional Strategies/Assessment [Concept Mapping]: 0.9243697478991597
Accuracy for 6. Online Instructional Strategies/Assessment [Real Time Online Exam]: 0.9394957983193277
Accuracy for 6. Online Instructional Strategies/Assessment [Individual Project/Assignment]: 0.9008403361344538
Accuracy for 6. Online Instructional Strategies/Assessment [Group Project/Assignment]: 0.9008403361344538
Accuracy for 6. Online Instructional Strategies/Assessment [Online Quiz/Test - MCQ]: 0.9008403361344538
Accuracy for 6. Online Instructional Strategies/Assessment [Online Quiz/Test - Essay]: 0.94117647058

## Save the model

In [28]:
# joblib.dump(model_dict, 'Model/XGBoost.joblib')

## Make predictions on new data

In [25]:
test = pd.read_csv('Datasets/test_data.csv')

In [26]:
predictions = pd.DataFrame({col: model.predict(test) for col, model in model_dict.items()})

predictions.head()

Unnamed: 0,6. Online Instructional Strategies/Assessment [Demonstration],6. Online Instructional Strategies/Assessment [Digital Lab Experiments],6. Online Instructional Strategies/Assessment [Forum],6. Online Instructional Strategies/Assessment [Case Study],6. Online Instructional Strategies/Assessment [Concept Mapping],6. Online Instructional Strategies/Assessment [Real Time Online Exam],6. Online Instructional Strategies/Assessment [Individual Project/Assignment],6. Online Instructional Strategies/Assessment [Group Project/Assignment],6. Online Instructional Strategies/Assessment [Online Quiz/Test - MCQ],6. Online Instructional Strategies/Assessment [Online Quiz/Test - Essay],6. Online Instructional Strategies/Assessment [Online Quiz/Test - Open Book],6. Online Instructional Strategies/Assessment [Peer Review Assessment Live Presentation],6. Online Instructional Strategies/Assessment [Recorded Presentation]
0,0,0,0,0,0,0,0,1,1,1,1,0,0
1,1,0,0,0,0,0,0,0,1,0,1,1,1
2,1,1,0,0,1,0,1,1,1,1,1,0,0


In [27]:
# predictions.to_csv('Datasets/prediction.csv', index=False)