# SVM Model
- Trained with VAK qeustions
- So far, has the highest accuracy

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [2]:
# Load the CSV file containing responses
df = pd.read_csv('Dataset/encoded_response_withQues.csv')
df.head()

Unnamed: 0,Gender,Level of Study,Household Income,Learning Objects [Slide presentation],Learning Objects [Book],Learning Objects [Lecture Note],Learning Objects [Educational game],Learning Objects [Video],Learning Objects [Audio-recorded lecture],Learning Objects [Animated instruction],...,28. I find it easiest to remember_Things I have done,29. I think I can tell someone is lying because_The vibes I get from them,29. I think I can tell someone is lying because_Their voice changes,29. I think I can tell someone is lying because_They avoid looking at you,30. When I'm meeting with an old friend_I give them a hug or a handshake,"30. When I'm meeting with an old friend_I say ""it's great to hear your voice!""","30. When I'm meeting with an old friend_I say ""it's great to see you!""",Preferred learning mode_Asynchronous Online Learning (On your own time),Preferred learning mode_Face to Face,Preferred learning mode_Synchronous Online Learning (Real Time)
0,2,1,2,0,0,0,0,0,0,0,...,False,False,False,True,False,True,False,False,True,False
1,2,1,2,0,0,0,0,0,0,0,...,False,False,False,True,False,True,False,False,False,True
2,2,1,2,1,0,1,0,0,0,0,...,True,True,False,False,False,False,True,False,True,False
3,2,1,3,1,1,1,0,0,0,0,...,True,True,False,False,True,False,False,False,True,False
4,2,1,3,1,1,1,0,0,0,0,...,True,True,False,False,True,False,False,True,False,False


In [3]:
# # Target variable: Learning Objects Preference
target = df[[
    'Learning Objects [Slide presentation]',
    'Learning Objects [Book]',
    'Learning Objects [Lecture Note]',
    'Learning Objects [Educational game]',
    'Learning Objects [Video]',
    'Learning Objects [Audio-recorded lecture]',
    'Learning Objects [Animated instruction]',
    'Learning Objects [Real object model]',
    'Learning Objects [Mind Map]',
    'Learning Objects [Multimedia content]',
    'Learning Objects [Interactive Tool]',
    'Learning Objects [Technology-supported learning include computer-based training systems]',
    'Learning Objects [Intelligent computer-aided instruction systems]'
]]

In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df.drop(target.columns, axis=1), target, test_size=0.25, random_state=42)

In [2]:
import joblib
from sklearn.model_selection import GridSearchCV

In [9]:
# # Create a RandomForestClassifier for each learning object
# classifiers = {}
# for col in target.columns:
#     classifier = SVC(kernel='rbf', random_state=42)
#     classifier.fit(X_train, y_train[col])
#     classifiers[col] = classifier
    
# # Save the trained SVM model to a file
# joblib.dump(classifier, "svm_model.joblib")

#-------
# Create a dictionary to hold the best estimators after grid search
best_estimators = {}

# Iterate through each learning object
for col in target.columns:
    # Define the parameter grid
    param_grid = {
        'C': [1],  # Expanded range of C values
        'gamma': [0.1],  # More options for gamma
        'kernel': ['rbf']  # Diverse kernel options
    }
    
    # Instantiate GridSearchCV
    grid_search = GridSearchCV(SVC(random_state=42), param_grid, cv=5, scoring='accuracy')
    
    # Fit the grid search to your data for the current learning object
    grid_search.fit(X_train, y_train[col])
    
    # Get the best parameters and best estimator for the current learning object
    best_params = grid_search.best_params_
    best_estimator = grid_search.best_estimator_
    
    # Save the best estimator (SVM model) to a file
#     joblib.dump(best_estimator, f"svm_model_{col}.joblib")
    
    # Store the best estimator in the dictionary for later use if needed
    best_estimators[col] = best_estimator
    
    # Evaluate the model on the validation set
    y_pred = best_estimator.predict(X_test)
    accuracy = accuracy_score(y_test[col], y_pred)
    print(f"Model Accuracy for {col}: {accuracy}")
    
# Save the trained SVM model to a file
joblib.dump(best_estimators, "Model/svm_model_withQues.joblib")

Model Accuracy for Learning Objects [Slide presentation]: 0.7591623036649214
Model Accuracy for Learning Objects [Book]: 0.8612565445026178
Model Accuracy for Learning Objects [Lecture Note]: 0.8010471204188482
Model Accuracy for Learning Objects [Educational game]: 0.8403141361256544
Model Accuracy for Learning Objects [Video]: 0.819371727748691
Model Accuracy for Learning Objects [Audio-recorded lecture]: 0.8612565445026178
Model Accuracy for Learning Objects [Animated instruction]: 0.8115183246073299
Model Accuracy for Learning Objects [Real object model]: 0.8141361256544503
Model Accuracy for Learning Objects [Mind Map]: 0.8507853403141361
Model Accuracy for Learning Objects [Multimedia content]: 0.8089005235602095
Model Accuracy for Learning Objects [Interactive Tool]: 0.8036649214659686
Model Accuracy for Learning Objects [Technology-supported learning include computer-based training systems]: 0.819371727748691
Model Accuracy for Learning Objects [Intelligent computer-aided instr

['Model/svm_model_withQues.joblib']

In [10]:
print('best_params:', best_params)

best_params: {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}


model 1 = best_params: {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}  
model 2 = best_params: {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}  
model 3 = best_params: {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}

In [8]:
# Make predictions on the testing set for each learning object
y_pred = pd.DataFrame({col: classifier.predict(X_test) for col, classifier in best_estimators.items()})

sum_acc = 0
mean_acc = 0

# Evaluate the model for each learning object
accuracy = {col: accuracy_score(y_test[col], y_pred[col]) for col in target.columns}
print("Model Accuracy for Learning Objects:")
for col, acc in accuracy.items():
    print(f"{col}: {acc}")
    sum_acc += acc
    
mean_acc = sum_acc/13
print("Mean accuracy: ", mean_acc)

Model Accuracy for Learning Objects:
Learning Objects [Slide presentation]: 0.7591623036649214
Learning Objects [Book]: 0.8612565445026178
Learning Objects [Lecture Note]: 0.8010471204188482
Learning Objects [Educational game]: 0.8403141361256544
Learning Objects [Video]: 0.819371727748691
Learning Objects [Audio-recorded lecture]: 0.8612565445026178
Learning Objects [Animated instruction]: 0.8115183246073299
Learning Objects [Real object model]: 0.8141361256544503
Learning Objects [Mind Map]: 0.8507853403141361
Learning Objects [Multimedia content]: 0.8089005235602095
Learning Objects [Interactive Tool]: 0.8036649214659686
Learning Objects [Technology-supported learning include computer-based training systems]: 0.819371727748691
Learning Objects [Intelligent computer-aided instruction systems]: 0.8141361256544503
Mean accuracy:  0.8203785743052758


svm_model =  0.64211   
svm_model2 = 0.6433  
svm_model3 = 0.64419  
svm_model_withQues = 0.820378

### Make predictions on a new data

In [3]:
svm_model = joblib.load("Model/svm_model_withQues.joblib")

In [15]:
data = pd.read_csv("Streamlit/merged_withdomVAK.csv")

In [16]:
# predictions = pd.DataFrame({col: classifier.predict(data) for col, classifier in best_estimators.items()})

predictions = pd.DataFrame({col: classifier.predict(data) for col, classifier in svm_model.items()})

In [17]:
predictions.head()

Unnamed: 0,Learning Objects [Slide presentation],Learning Objects [Book],Learning Objects [Lecture Note],Learning Objects [Educational game],Learning Objects [Video],Learning Objects [Audio-recorded lecture],Learning Objects [Animated instruction],Learning Objects [Real object model],Learning Objects [Mind Map],Learning Objects [Multimedia content],Learning Objects [Interactive Tool],Learning Objects [Technology-supported learning include computer-based training systems],Learning Objects [Intelligent computer-aided instruction systems]
0,0,0,0,0,0,0,0,0,0,1,1,1,1
