In [1]:
import math
import numpy as np
import pandas as pd

from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split


pd.set_option('display.max_columns', 2000)
pd.set_option('mode.use_inf_as_na', True)

from sklearn.svm import SVC as classifier_SVC

## Functions for data preparation

In [2]:
def l2_norm(vectors):
    norms = []
    for vector in vectors.values:
        denominator = math.sqrt(sum([x**2 for x in vector]))
        norm = [round(x / denominator, 6) for x in vector]
        norms.append(norm)
    return norms

In [3]:
def prepare_data(norm):
    
    #reading the dataset
    tf_idf_df = pd.read_csv('term_weighting_result_unsupervised/traditional_tf_idf.csv')
    
    # Converting the categorical class lables into numerical
    cognitive_level = {"Knowledge": 0, "Comprehension": 1, "Application": 2, "Analysis": 3, "Synthesis": 4, "Evaluation": 5}
    tf_idf_df["BT_level"].replace(cognitive_level, inplace = True)
    
    #Creating target and independent variable
    X = tf_idf_df.iloc[:, :tf_idf_df.shape[1] - 1]
    y = tf_idf_df.BT_level
    print(y)
    #Norm X values
    if norm == True:
        Temp_x = l2_norm(X)
        X = pd.DataFrame(Temp_x, columns = X.columns)
        norm_tw = X.copy()
        norm_tw['BT_level'] = tf_idf_df["BT_level"].values
        norm_tw.to_excel('term_weighting_result_unsupervised/norm-tf-idf.xlsx', index = False)

    #printing max and min value of X
    max_values = X.max()
    min_values = X.min()
    print('Max value: ', max(max_values))
    print('Min value: ', min(min_values))
    
    return (X, y, tf_idf_df["BT_level"])

## Function to print average result

In [4]:
def print_result(result):
    
    averge_accuracy = round(np.mean(result[0]), 3)
    averge_f1_score = round(np.mean(result[1]), 3)
    
    print("Average accuracy = ", averge_accuracy)
    print("Average f1-score = ", averge_f1_score)

## Results of different Classifiers

In [5]:
#Function calling to prepare data
X, y, z = prepare_data(norm = True)

0       1
1       1
2       1
3       1
4       1
       ..
2517    2
2518    0
2519    3
2520    5
2521    0
Name: BT_level, Length: 2522, dtype: int64
Max value:  1.0
Min value:  0.0


In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size = 0.10, stratify = z, 
                                                         shuffle = True, random_state = 1)

### SVC(Linear Kernel)

In [7]:
#Initialising the classifier
model_SVC_linear = classifier_SVC(kernel = 'linear', decision_function_shape = 'ovo', C = 1.0)
model_SVC_linear.fit(X_train, y_train)

prediction = model_SVC_linear.predict(X_test)

accuracy = accuracy_score(y_test, prediction)
print(f"Accuracy: {accuracy}")
f1_ = f1_score(y_test, prediction, average = 'weighted')
print(f"F1 score: {f1_}")

Accuracy: 0.782608695652174
F1 score: 0.7813796137347223
