# SVM Model
- Trained with VAK qeustions
- So far, has the highest accuracy

In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

import joblib
from sklearn.model_selection import GridSearchCV

In [6]:
# Load the CSV file containing responses
df = pd.read_csv('Dataset/encoded_response_withQues.csv')
df.head()

Unnamed: 0,Gender,Level of Study,Household Income,Learning Objects [Slide presentation],Learning Objects [Book],Learning Objects [Lecture Note],Learning Objects [Educational game],Learning Objects [Video],Learning Objects [Audio-recorded lecture],Learning Objects [Animated instruction],...,28. I find it easiest to remember_Things I have done,29. I think I can tell someone is lying because_The vibes I get from them,29. I think I can tell someone is lying because_Their voice changes,29. I think I can tell someone is lying because_They avoid looking at you,30. When I'm meeting with an old friend_I give them a hug or a handshake,"30. When I'm meeting with an old friend_I say ""it's great to hear your voice!""","30. When I'm meeting with an old friend_I say ""it's great to see you!""",Preferred learning mode_Asynchronous Online Learning (On your own time),Preferred learning mode_Face to Face,Preferred learning mode_Synchronous Online Learning (Real Time)
0,2,1,2,0,0,0,0,0,0,0,...,False,False,False,True,False,True,False,False,True,False
1,2,1,2,0,0,0,0,0,0,0,...,False,False,False,True,False,True,False,False,False,True
2,2,1,2,1,0,1,0,0,0,0,...,True,True,False,False,False,False,True,False,True,False
3,2,1,3,1,1,1,0,0,0,0,...,True,True,False,False,True,False,False,False,True,False
4,2,1,3,1,1,1,0,0,0,0,...,True,True,False,False,True,False,False,True,False,False


### Define target variables

In [7]:
# # Target variable: Learning Objects Preference
target = df[[
    'Learning Objects [Slide presentation]',
    'Learning Objects [Book]',
    'Learning Objects [Lecture Note]',
    'Learning Objects [Educational game]',
    'Learning Objects [Video]',
    'Learning Objects [Audio-recorded lecture]',
    'Learning Objects [Animated instruction]',
    'Learning Objects [Real object model]',
    'Learning Objects [Mind Map]',
    'Learning Objects [Multimedia content]',
    'Learning Objects [Interactive Tool]',
    'Learning Objects [Technology-supported learning include computer-based training systems]',
    'Learning Objects [Intelligent computer-aided instruction systems]'
]]

### Split test and train data

In [8]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df.drop(target.columns, axis=1), target, test_size=0.25, random_state=42)

### Train model
- use GridSearchCV to find the best parameters which will give the highest accuracy
- save the model using joblib

In [10]:
# Create a dictionary to hold the best estimators after grid search
best_estimators = {}

# Iterate through each learning object
for col in target.columns:
    # Define the parameter grid
    param_grid = {
        'C': [1],  # Expanded range of C values
        'gamma': [0.1],  # More options for gamma
        'kernel': ['rbf']  # Diverse kernel options
    }
    
    # Instantiate GridSearchCV
    grid_search = GridSearchCV(SVC(random_state=42), param_grid, cv=5, scoring='accuracy')
    
    # Fit the grid search to your data for the current learning object
    grid_search.fit(X_train, y_train[col])
    
    # Get the best parameters and best estimator for the current learning object
    best_params = grid_search.best_params_
    best_estimator = grid_search.best_estimator_
    
    # Save the best estimator (SVM model) to a file
#     joblib.dump(best_estimator, f"svm_model_{col}.joblib")
    
    # Store the best estimator in the dictionary for later use if needed
    best_estimators[col] = best_estimator
    
    # Evaluate the model on the validation set
    y_pred = best_estimator.predict(X_test)
    accuracy = accuracy_score(y_test[col], y_pred)
    print(f"Model Accuracy for {col}: {accuracy}")
    
# Save the trained SVM model to a file
joblib.dump(best_estimators, "Model/svm_model.joblib")

Model Accuracy for Learning Objects [Slide presentation]: 0.7591623036649214
Model Accuracy for Learning Objects [Book]: 0.8612565445026178
Model Accuracy for Learning Objects [Lecture Note]: 0.8010471204188482
Model Accuracy for Learning Objects [Educational game]: 0.8403141361256544
Model Accuracy for Learning Objects [Video]: 0.819371727748691
Model Accuracy for Learning Objects [Audio-recorded lecture]: 0.8612565445026178
Model Accuracy for Learning Objects [Animated instruction]: 0.8115183246073299
Model Accuracy for Learning Objects [Real object model]: 0.8141361256544503
Model Accuracy for Learning Objects [Mind Map]: 0.8507853403141361
Model Accuracy for Learning Objects [Multimedia content]: 0.8089005235602095
Model Accuracy for Learning Objects [Interactive Tool]: 0.8036649214659686
Model Accuracy for Learning Objects [Technology-supported learning include computer-based training systems]: 0.819371727748691
Model Accuracy for Learning Objects [Intelligent computer-aided instr

['Model/svm_model.joblib']

In [11]:
print('best_params:', best_params)

best_params: {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}


best_params: {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}

### Check model accuracy
- use classification_report

In [16]:
# Import the saved model
svm_model = joblib.load("Model/svm_model_withQues.joblib")

In [19]:
# Make predictions on the test set
y_pred = pd.DataFrame({col: classifier.predict(X_test) for col, classifier in svm_model.items()})

# Classification Report
print("Classification Report:")
print(classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.76      0.77       200
           1       0.96      0.57      0.72       117
           2       0.77      1.00      0.87       254
           3       0.97      0.57      0.72       137
           4       0.83      0.81      0.82       194
           5       0.98      0.63      0.77       138
           6       0.90      0.56      0.69       143
           7       0.85      0.58      0.69       136
           8       1.00      0.59      0.74       140
           9       0.84      0.66      0.74       156
          10       0.81      0.72      0.77       170
          11       0.85      0.71      0.77       165
          12       0.88      0.61      0.72       151

   micro avg       0.85      0.70      0.77      2101
   macro avg       0.88      0.68      0.75      2101
weighted avg       0.87      0.70      0.76      2101
 samples avg       0.73      0.60      0.62      2101



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


svm_model_withQues = 0.820378 ---> use diff accuracy measurement (avg)   

svm_model:   
micro avg       0.85      0.70      0.77      2101  
macro avg       0.88      0.68      0.75      2101  
weighted avg    0.87      0.70      0.76      2101  
samples avg     0.73      0.60      0.62      2101  

### Make predictions on a new data

In [3]:
svm_model = joblib.load("Model/svm_model_withQues.joblib")

In [15]:
data = pd.read_csv("Streamlit/merged_withdomVAK.csv")

In [16]:
# predictions = pd.DataFrame({col: classifier.predict(data) for col, classifier in best_estimators.items()})

predictions = pd.DataFrame({col: classifier.predict(data) for col, classifier in svm_model.items()})

In [17]:
predictions.head()

Unnamed: 0,Learning Objects [Slide presentation],Learning Objects [Book],Learning Objects [Lecture Note],Learning Objects [Educational game],Learning Objects [Video],Learning Objects [Audio-recorded lecture],Learning Objects [Animated instruction],Learning Objects [Real object model],Learning Objects [Mind Map],Learning Objects [Multimedia content],Learning Objects [Interactive Tool],Learning Objects [Technology-supported learning include computer-based training systems],Learning Objects [Intelligent computer-aided instruction systems]
0,0,0,0,0,0,0,0,0,0,1,1,1,1
