# SVM Model
- Trained with VAK qeustions
- So far, has the highest accuracy

In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

import joblib
from sklearn.model_selection import GridSearchCV

In [21]:
# Load the CSV file containing responses
df = pd.read_csv('Dataset/encoded_new_response_withQues.csv')
df.head()

Unnamed: 0,Gender,Level of Study,Household Income,Learning Objects [Slide presentation],Learning Objects [Book],Learning Objects [Lecture Note],Learning Objects [Educational game],Learning Objects [Video],Learning Objects [Audio-recorded lecture],Learning Objects [Animated instruction],...,"30. When I'm meeting with an old friend_I say ""it's great to see you!""",Preferred learning mode_Asynchronous Online Learning (On your own time),Preferred learning mode_Face to Face,Preferred learning mode_Synchronous Online Learning (Real Time),Preferred Communication Platform_Call,Preferred Communication Platform_Email,Preferred Communication Platform_Others,Preferred Communication Platform_Telegram,Preferred Communication Platform_University eLearning Chat Room,Preferred Communication Platform_Whatsapp
0,2,1,2,0,0,0,0,0,0,0,...,False,False,True,False,False,False,False,False,False,True
1,2,1,2,0,0,0,0,0,0,0,...,False,False,False,True,False,False,False,False,False,True
2,2,1,2,1,0,1,0,0,0,0,...,True,False,True,False,False,True,False,False,False,False
3,2,1,2,1,0,1,0,0,0,0,...,True,False,True,False,False,False,False,False,True,False
4,2,1,2,1,0,1,0,0,0,0,...,True,False,True,False,False,False,False,False,False,True


### Define target variables

In [22]:
# # Target variable: Learning Objects Preference
target = df[[
    'Learning Objects [Slide presentation]',
    'Learning Objects [Book]',
    'Learning Objects [Lecture Note]',
    'Learning Objects [Educational game]',
    'Learning Objects [Video]',
    'Learning Objects [Audio-recorded lecture]',
    'Learning Objects [Animated instruction]',
    'Learning Objects [Real object model]',
    'Learning Objects [Mind Map]',
    'Learning Objects [Multimedia content]',
    'Learning Objects [Interactive Tool]',
    'Learning Objects [Technology-supported learning include computer-based training systems]',
    'Learning Objects [Intelligent computer-aided instruction systems]'
]]

### Split test and train data

In [23]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df.drop(target.columns, axis=1), target, test_size=0.25, random_state=42)

### Train model
- use GridSearchCV to find the best parameters which will give the highest accuracy
- save the model using joblib

In [24]:
# Create a dictionary to hold the best estimators after grid search
best_estimators = {}

# Iterate through each learning object
for col in target.columns:
    # Define the parameter grid
    param_grid = {
        'C': [1],  # Expanded range of C values
        'gamma': [0.1],  # More options for gamma
        'kernel': ['rbf']  # Diverse kernel options
    }
    
    # Instantiate GridSearchCV
    grid_search = GridSearchCV(SVC(random_state=42), param_grid, cv=5, scoring='accuracy')
    
    # Fit the grid search to the data for the current learning object
    grid_search.fit(X_train, y_train[col])
    
    # Get the best parameters and best estimator for the current learning object
    best_params = grid_search.best_params_
    best_estimator = grid_search.best_estimator_
    
    # Store the best estimator in the dictionary for later use if needed
    best_estimators[col] = best_estimator
    
    # Evaluate the model on the validation set
    y_pred = best_estimator.predict(X_test)
    accuracy = accuracy_score(y_test[col], y_pred)
    print(f"Model Accuracy for {col}: {accuracy}")
    


Model Accuracy for Learning Objects [Slide presentation]: 0.9547413793103449
Model Accuracy for Learning Objects [Book]: 0.9665948275862069
Model Accuracy for Learning Objects [Lecture Note]: 0.9633620689655172
Model Accuracy for Learning Objects [Educational game]: 0.9601293103448276
Model Accuracy for Learning Objects [Video]: 0.9536637931034483
Model Accuracy for Learning Objects [Audio-recorded lecture]: 0.9612068965517241
Model Accuracy for Learning Objects [Animated instruction]: 0.9612068965517241
Model Accuracy for Learning Objects [Real object model]: 0.9525862068965517
Model Accuracy for Learning Objects [Mind Map]: 0.9622844827586207
Model Accuracy for Learning Objects [Multimedia content]: 0.959051724137931
Model Accuracy for Learning Objects [Interactive Tool]: 0.9558189655172413
Model Accuracy for Learning Objects [Technology-supported learning include computer-based training systems]: 0.9644396551724138
Model Accuracy for Learning Objects [Intelligent computer-aided inst

In [25]:
# Save the trained SVM model to a file
joblib.dump(best_estimators, "Model/svm_new_model.joblib")

['Model/svm_new_model.joblib']

In [26]:
print('best_params:', best_params)

best_params: {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}


best_params: {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}

### Check model accuracy
- use classification_report

In [27]:
# Import the saved model
svm_model = joblib.load("Model/svm_new_model.joblib")

In [28]:
# Make predictions on the test set
y_pred = pd.DataFrame({col: classifier.predict(X_test) for col, classifier in svm_model.items()})

# Classification Report
print("Classification Report:")
print(classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.95      0.96       492
           1       1.00      0.89      0.94       281
           2       0.96      0.99      0.97       654
           3       0.98      0.91      0.94       343
           4       0.96      0.95      0.95       477
           5       0.97      0.91      0.94       327
           6       0.98      0.92      0.95       365
           7       0.98      0.90      0.94       370
           8       0.99      0.91      0.95       350
           9       0.98      0.93      0.95       428
          10       0.97      0.93      0.95       416
          11       0.99      0.93      0.96       426
          12       0.99      0.93      0.96       392

   micro avg       0.98      0.93      0.95      5321
   macro avg       0.98      0.93      0.95      5321
weighted avg       0.98      0.93      0.95      5321
 samples avg       0.86      0.84      0.84      5321



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


svm_model_withQues = 0.820378 ---> use diff accuracy measurement (avg)   

svm_model:   
micro avg       0.85      0.70      0.77      2101  
macro avg       0.88      0.68      0.75      2101  
weighted avg    0.87      0.70      0.76      2101  
samples avg     0.73      0.60      0.62      2101  

In [29]:
# Initialize a dictionary to store accuracy scores
accuracy_scores = {}

# Loop through each column and calculate accuracy score
for col in y_test.columns:
    accuracy = accuracy_score(y_test[col], y_pred[col])
    accuracy_scores[col] = accuracy
    print(f"Accuracy for {col}: {accuracy}")

# Overall accuracy score
overall_accuracy = accuracy_score(y_test.values.flatten(), y_pred.values.flatten())
print(f"\nOverall Accuracy: {overall_accuracy}")

Accuracy for Learning Objects [Slide presentation]: 0.9547413793103449
Accuracy for Learning Objects [Book]: 0.9665948275862069
Accuracy for Learning Objects [Lecture Note]: 0.9633620689655172
Accuracy for Learning Objects [Educational game]: 0.9601293103448276
Accuracy for Learning Objects [Video]: 0.9536637931034483
Accuracy for Learning Objects [Audio-recorded lecture]: 0.9612068965517241
Accuracy for Learning Objects [Animated instruction]: 0.9612068965517241
Accuracy for Learning Objects [Real object model]: 0.9525862068965517
Accuracy for Learning Objects [Mind Map]: 0.9622844827586207
Accuracy for Learning Objects [Multimedia content]: 0.959051724137931
Accuracy for Learning Objects [Interactive Tool]: 0.9558189655172413
Accuracy for Learning Objects [Technology-supported learning include computer-based training systems]: 0.9644396551724138
Accuracy for Learning Objects [Intelligent computer-aided instruction systems]: 0.9665948275862069

Overall Accuracy: 0.9601293103448276


### Make predictions on a new data

In [30]:
svm_model = joblib.load("Model/svm_new_model.joblib")

In [37]:
data = pd.read_csv("Streamlit/merged_withdomVAK.csv")

In [38]:
# predictions = pd.DataFrame({col: classifier.predict(data) for col, classifier in best_estimators.items()})

predictions = pd.DataFrame({col: classifier.predict(data) for col, classifier in svm_model.items()})

In [39]:
predictions.head()

Unnamed: 0,Learning Objects [Slide presentation],Learning Objects [Book],Learning Objects [Lecture Note],Learning Objects [Educational game],Learning Objects [Video],Learning Objects [Audio-recorded lecture],Learning Objects [Animated instruction],Learning Objects [Real object model],Learning Objects [Mind Map],Learning Objects [Multimedia content],Learning Objects [Interactive Tool],Learning Objects [Technology-supported learning include computer-based training systems],Learning Objects [Intelligent computer-aided instruction systems]
0,1,0,1,0,0,0,0,0,0,0,1,0,0
