In [1]:
#Importing the required libraries

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Loading the dataset into a pandas DataFrame

df = pd.read_csv('grades.csv')

In [4]:
# Split the dataset into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(df.drop(['student_id'], axis=1), df['student_id'], test_size=0.2, random_state=42)

In [5]:
# Initialize the machine learning algorithms

svm = SVC(kernel='linear', C=1.0)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
nb = MultinomialNB()
dt = DecisionTreeClassifier(random_state=42)
knn = KNeighborsClassifier()

In [6]:
# Fit the algorithms on the training set

vectorizer = TfidfVectorizer(stop_words='english')
X_train_vec = vectorizer.fit_transform(X_train['assignment1_submission'].astype(str) + ' ' + 
                                      X_train['assignment2_submission'].astype(str) + ' ' +
                                      X_train['assignment3_submission'].astype(str) + ' ' + 
                                      X_train['assignment4_submission'].astype(str) + ' ' +
                                      X_train['assignment5_submission'].astype(str) + ' ' + 
                                      X_train['assignment6_submission'].astype(str))
X_test_vec = vectorizer.transform(X_test['assignment1_submission'].astype(str) + ' ' + 
                                   X_test['assignment2_submission'].astype(str) + ' ' + 
                                   X_test['assignment3_submission'].astype(str) + ' ' + 
                                   X_test['assignment4_submission'].astype(str) + ' ' + 
                                   X_test['assignment5_submission'].astype(str) + ' ' + 
                                   X_test['assignment6_submission'].astype(str))

In [7]:
# Training the algorithms using .fit

for classifier in [svm, rf, nb, dt, knn]:
    classifier.fit(X_train_vec, y_train)
    y_pred = classifier.predict(X_test_vec)

In [10]:
import warnings
warnings.filterwarnings('ignore')
# Evaluate the performance of the algorithms

svm_pred = svm.predict(X_test_vec)
rf_pred = rf.predict(X_test_vec)
nb_pred = nb.predict(X_test_vec)
dt_pred = dt.predict(X_test_vec)
knn_pred = knn.predict(X_test_vec)

svm_acc = accuracy_score(y_test, svm_pred)
svm_prec = precision_score(y_test, svm_pred, average='weighted')
svm_rec = recall_score(y_test, svm_pred, average='weighted')
svm_f1 = f1_score(y_test, svm_pred, average='weighted')

rf_acc = accuracy_score(y_test, rf_pred)
rf_prec = precision_score(y_test, rf_pred, average='weighted')
rf_rec = recall_score(y_test, rf_pred, average='weighted')
rf_f1 = f1_score(y_test, rf_pred, average='weighted')

nb_acc = accuracy_score(y_test, nb_pred)
nb_prec = precision_score(y_test, nb_pred, average='weighted')
nb_rec = recall_score(y_test, nb_pred, average='weighted')
nb_f1 = f1_score(y_test, nb_pred, average='weighted')

dt_acc = accuracy_score(y_test, dt_pred)
dt_prec = precision_score(y_test, dt_pred, average='weighted')
dt_rec = recall_score(y_test, dt_pred, average='weighted')
dt_f1 = f1_score(y_test, dt_pred, average='weighted')

knn_acc = accuracy_score(y_test, knn_pred)
knn_prec = precision_score(y_test, knn_pred, average='weighted')
knn_rec = recall_score(y_test, knn_pred, average='weighted')
knn_f1 = f1_score(y_test, knn_pred, average='weighted')

In [11]:
# Print the evaluation results for each algorithm

print('Support Vector Machine:')
print('Accuracy: {:.3f}'.format(svm_acc))
print('Precision: {:.3f}'.format(svm_prec))
print('Recall: {:.3f}'.format(svm_rec))
print('F1 Score: {:.3f}'.format(svm_f1))

print('\nRandom Forest:')
print('Accuracy: {:.3f}'.format(rf_acc))
print('Precision: {:.3f}'.format(rf_prec))
print('Recall: {:.3f}'.format(rf_rec))
print('F1 Score: {:.3f}'.format(rf_f1))

print('\nNaive Bayes:')
print('Accuracy: {:.3f}'.format(nb_acc))
print('Precision: {:.3f}'.format(nb_prec))
print('Recall: {:.3f}'.format(nb_rec))
print('F1 Score: {:.3f}'.format(nb_f1))

print('\nDecision Tree:')
print('Accuracy: {:.3f}'.format(dt_acc))
print('Precision: {:.3f}'.format(dt_prec))
print('Recall: {:.3f}'.format(dt_rec))
print('F1 Score: {:.3f}'.format(dt_f1))

print('\nK-Nearest Neighbor:')
print('Accuracy: {:.3f}'.format(knn_acc))
print('Precision: {:.3f}'.format(knn_prec))
print('Recall: {:.3f}'.format(knn_rec))
print('F1 Score: {:.3f}'.format(knn_f1))

Support Vector Machine:
Accuracy: 0.112
Precision: 0.107
Recall: 0.112
F1 Score: 0.103

Random Forest:
Accuracy: 0.108
Precision: 0.107
Recall: 0.108
F1 Score: 0.107

Naive Bayes:
Accuracy: 0.095
Precision: 0.071
Recall: 0.095
F1 Score: 0.075

Decision Tree:
Accuracy: 0.104
Precision: 0.104
Recall: 0.104
F1 Score: 0.103

K-Nearest Neighbor:
Accuracy: 0.117
Precision: 0.114
Recall: 0.117
F1 Score: 0.108
