In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
import joblib

## Load data

In [10]:
# Read the dataset
df = pd.read_csv('Datasets/latest_responses_encoded.csv')

df.head()

Unnamed: 0,Gender,Level of Study,6. Online Instructional Strategies/Assessment [Demonstration],6. Online Instructional Strategies/Assessment [Digital Lab Experiments],6. Online Instructional Strategies/Assessment [Forum],6. Online Instructional Strategies/Assessment [Case Study],6. Online Instructional Strategies/Assessment [Concept Mapping],6. Online Instructional Strategies/Assessment [Real Time Online Exam],6. Online Instructional Strategies/Assessment [Individual Project/Assignment],6. Online Instructional Strategies/Assessment [Group Project/Assignment],...,"27. If I am very angry_I stomp about, slam doors and throw things",28. I find it easiest to remember_Faces,28. I find it easiest to remember_Names,28. I find it easiest to remember_Things I have done,29. I think I can tell someone is lying because_The vibes I get from them,29. I think I can tell someone is lying because_Their voice changes,29. I think I can tell someone is lying because_They avoid looking at you,30. When I'm meeting with an old friend_I give them a hug or a handshake,"30. When I'm meeting with an old friend_I say ""it's great to hear your voice!""","30. When I'm meeting with an old friend_I say ""it's great to see you!"""
0,0,3,1,1,0,0,1,0,1,1,...,False,False,False,True,True,False,False,False,False,True
1,0,3,1,1,0,0,1,0,1,1,...,False,False,False,True,True,False,False,False,False,True
2,0,3,1,1,0,0,1,0,1,1,...,False,False,False,True,True,False,False,False,False,True
3,0,3,1,1,0,0,1,0,1,1,...,False,False,False,True,True,False,False,False,False,True
4,0,3,1,1,0,0,1,0,1,1,...,False,False,False,True,True,False,False,False,False,True


## Extract target variables

In [11]:
# Extract online assessment columns as target
target_columns = df.iloc[:, 2:15]
target_columns.columns

Index(['6. Online Instructional Strategies/Assessment [Demonstration]',
       '6. Online Instructional Strategies/Assessment [Digital Lab Experiments]',
       '6. Online Instructional Strategies/Assessment [Forum]',
       '6. Online Instructional Strategies/Assessment [Case Study]',
       '6. Online Instructional Strategies/Assessment [Concept Mapping]',
       '6. Online Instructional Strategies/Assessment [Real Time Online Exam]',
       '6. Online Instructional Strategies/Assessment [Individual Project/Assignment]',
       '6. Online Instructional Strategies/Assessment [Group Project/Assignment]',
       '6. Online Instructional Strategies/Assessment [Online Quiz/Test - MCQ]',
       '6. Online Instructional Strategies/Assessment [Online Quiz/Test - Essay]',
       '6. Online Instructional Strategies/Assessment [Online Quiz/Test - Open Book]',
       '6. Online Instructional Strategies/Assessment [Peer Review Assessment Live Presentation]',
       '6. Online Instructional Strate

## Split data

In [12]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df.drop(target_columns.columns, axis=1), target_columns, 
                                                    test_size=0.3, random_state=42)

## Train the model

In [27]:
# Initialise an empty dictionary to store the trained model
model_dict = {}

# Train a Naive Bayes
for column in target_columns.columns:
    model = GaussianNB()
    model.fit(X_train, y_train[column])
    model_dict[column] = model

In [28]:
# Display the parameters for each trained model
for column, model in model_dict.items():
    print(f"Model Parameters for {column}:")
    model_params = model.get_params()
    for param, value in model_params.items():
        print(f"{param}: {value}")
    print("\n")

Model Parameters for 6. Online Instructional Strategies/Assessment [Demonstration]:
priors: None
var_smoothing: 1e-09


Model Parameters for 6. Online Instructional Strategies/Assessment [Digital Lab Experiments]:
priors: None
var_smoothing: 1e-09


Model Parameters for 6. Online Instructional Strategies/Assessment [Forum]:
priors: None
var_smoothing: 1e-09


Model Parameters for 6. Online Instructional Strategies/Assessment [Case Study]:
priors: None
var_smoothing: 1e-09


Model Parameters for 6. Online Instructional Strategies/Assessment [Concept Mapping]:
priors: None
var_smoothing: 1e-09


Model Parameters for 6. Online Instructional Strategies/Assessment [Real Time Online Exam]:
priors: None
var_smoothing: 1e-09


Model Parameters for 6. Online Instructional Strategies/Assessment [Individual Project/Assignment]:
priors: None
var_smoothing: 1e-09


Model Parameters for 6. Online Instructional Strategies/Assessment [Group Project/Assignment]:
priors: None
var_smoothing: 1e-09


Mode

## Evaluate the model

In [8]:
# Make predictions on the test set
y_pred = pd.DataFrame({col: model.predict(X_test) for col, model in model_dict.items()})

# Classification Report
print("Classification Report:")
print(classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.71      0.72       389
           1       0.40      0.52      0.45       163
           2       0.39      0.45      0.42       140
           3       0.43      0.49      0.46       173
           4       0.40      0.39      0.39       179
           5       0.41      0.48      0.45       153
           6       0.46      1.00      0.63       265
           7       0.52      0.45      0.48       252
           8       0.68      0.50      0.57       344
           9       0.44      0.49      0.46       161
          10       0.86      0.05      0.10       359
          11       0.44      0.45      0.44       176
          12       0.63      0.58      0.60       312

   micro avg       0.52      0.51      0.51      3066
   macro avg       0.52      0.51      0.48      3066
weighted avg       0.57      0.51      0.49      3066
 samples avg       0.52      0.52      0.45      3066



In [8]:
# Initialize a dictionary to store accuracy scores
accuracy_scores = {}

# Loop through each column and calculate accuracy score
for col in y_test.columns:
    accuracy = accuracy_score(y_test[col], y_pred[col])
    accuracy_scores[col] = accuracy
    print(f"Accuracy for {col}: {accuracy}")

# Overall accuracy score
overall_accuracy = accuracy_score(y_test.values.flatten(), y_pred.values.flatten())
print(f"\nOverall Accuracy: {overall_accuracy}")

Accuracy for 6. Online Instructional Strategies/Assessment [Demonstration]: 0.6403361344537815
Accuracy for 6. Online Instructional Strategies/Assessment [Digital Lab Experiments]: 0.6571428571428571
Accuracy for 6. Online Instructional Strategies/Assessment [Forum]: 0.704201680672269
Accuracy for 6. Online Instructional Strategies/Assessment [Case Study]: 0.6638655462184874
Accuracy for 6. Online Instructional Strategies/Assessment [Concept Mapping]: 0.6386554621848739
Accuracy for 6. Online Instructional Strategies/Assessment [Real Time Online Exam]: 0.6907563025210084
Accuracy for 6. Online Instructional Strategies/Assessment [Individual Project/Assignment]: 0.4689075630252101
Accuracy for 6. Online Instructional Strategies/Assessment [Group Project/Assignment]: 0.5915966386554622
Accuracy for 6. Online Instructional Strategies/Assessment [Online Quiz/Test - MCQ]: 0.5747899159663865
Accuracy for 6. Online Instructional Strategies/Assessment [Online Quiz/Test - Essay]: 0.690756302521

## Save the model

In [None]:
# joblib.dump(model_dict, 'Model/nb.joblib')