In [17]:
import pandas as pd
import numpy as np

In [18]:
#Creating SQL connection

In [19]:
import django
from django.conf import settings

import os
import sqlite3

# Set the DJANGO_SETTINGS_MODULE environment variable
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'ils.settings')

# Initialize Django
django.setup()

db_path = os.path.join(settings.BASE_DIR, 'db.sqlite3')
conn = sqlite3.connect(db_path)

In [20]:
#User response table

In [21]:
table_name = 'quiz_userresponse'
query = f"SELECT * FROM {table_name}"
df_userresponse = pd.read_sql_query(query, conn)
df_userresponse

Unnamed: 0,id,time_spent,times_option_changed,is_correct,question_id,user_confidence
0,1,4000000,0,1,1,10
1,2,9000000,0,1,2,10
2,3,9000000,1,1,3,10
3,4,6000000,0,1,1,7
4,5,2000000,0,1,2,8
5,6,2000000,0,1,3,10
6,7,13000000,0,1,4,5


In [22]:
from math import floor

df_userresponse = df_userresponse.groupby('question_id').agg({
    'id':'last',
    'times_option_changed': 'sum',
    'is_correct': 'last',
    'user_confidence': lambda x: floor(x.mean()),
    'time_spent': 'sum'
}).reset_index()

df_userresponse = df_userresponse[['id', 'times_option_changed', 'is_correct', 'question_id', 'user_confidence', 'time_spent']]

df_userresponse

Unnamed: 0,id,times_option_changed,is_correct,question_id,user_confidence,time_spent
0,4,0,1,1,8,10000000
1,5,0,1,2,9,11000000
2,6,1,1,3,10,11000000
3,7,0,1,4,5,13000000


In [23]:
#Questions table

In [24]:
table_name = 'quiz_question'
query = f"SELECT * FROM {table_name}"
df_question = pd.read_sql_query(query, conn)
df_question

Unnamed: 0,id,label,cognitive_ability,is_answered
0,1,What is the product of 12 and 9?,Level_1,0
1,2,"If y = 3, what is the value of 4y - 5?",Level_1,0
2,3,What is the perimeter of a square with a side ...,Level_1,0
3,4,"What is the next number in the series: 2, 5, 8...",Level_1,0
4,5,What is 20% of 150?,Level_1,0
5,6,Solve for x in the equation 3x - 7 = 11.,Level_2,0
6,7,"What is the next number in the sequence: 5, 10...",Level_2,0
7,8,"A box contains 4 red, 5 blue, and 6 green ball...",Level_2,0
8,9,What is the simple interest on $2000 at an ann...,Level_2,0
9,10,"If 'CAT' is coded as 'DBU', how is 'DOG' coded...",Level_2,0


In [25]:
question_cognitive_ability = dict(zip(df_question['id'], df_question['cognitive_ability']))
question_cognitive_ability

{1: 'Level_1',
 2: 'Level_1',
 3: 'Level_1',
 4: 'Level_1',
 5: 'Level_1',
 6: 'Level_2',
 7: 'Level_2',
 8: 'Level_2',
 9: 'Level_2',
 10: 'Level_2',
 11: 'Level_3',
 12: 'Level_3',
 13: 'Level_3',
 14: 'Level_3',
 15: 'Level_3'}

In [26]:
df_userresponse['cognitive_ability'] = df_userresponse['question_id'].map(question_cognitive_ability)
df_userresponse

Unnamed: 0,id,times_option_changed,is_correct,question_id,user_confidence,time_spent,cognitive_ability
0,4,0,1,1,8,10000000,Level_1
1,5,0,1,2,9,11000000,Level_1
2,6,1,1,3,10,11000000,Level_1
3,7,0,1,4,5,13000000,Level_1


In [28]:
df_userresponse['time_spent'] = df_userresponse['time_spent'].apply(lambda x: x / 1000000)
cognitive_ability_mapping = {'Level_1': 1, 'Level_2': 2, 'Level_3': 3}
df_userresponse['cognitive_ability'] = df_userresponse['cognitive_ability'].map(cognitive_ability_mapping)
df_userresponse

Unnamed: 0,id,times_option_changed,is_correct,question_id,user_confidence,time_spent,cognitive_ability
0,4,0,1,1,8,1e-05,1
1,5,0,1,2,9,1.1e-05,1
2,6,1,1,3,10,1.1e-05,1
3,7,0,1,4,5,1.3e-05,1


In [29]:
#Model

In [30]:
features = ['time_spent', 'times_option_changed', 'is_correct', 'cognitive_ability', 'user_confidence']
target = 'user_confidence'

In [31]:
from sklearn.model_selection import train_test_split

X = df_userresponse[features]
y = df_userresponse[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [32]:
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [33]:
predictions = model.predict(X_test)
print(y_test)
predictions

1    9
Name: user_confidence, dtype: int64


array([8.12])

In [35]:
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(y_test, predictions)
print(f'Mean Absolute Error: {mae}')

Mean Absolute Error: 0.8800000000000008


In [38]:
import joblib

joblib.dump(model, 'confidence_predictor_model.pkl')

['confidence_predictor_model.pkl']

In [39]:
conn.close()

In [40]:
import subprocess

# Define the value of A
A = floor(df_userresponse['user_confidence'].mean())  # Example value of A
print(A)
# Run the reset_exam_db management command
subprocess.run(['python', 'manage.py', 'reset_exam_db'])

# Run the select_questions management command
subprocess.run(['python', 'manage.py', 'select_questions', str(A)])

8.0


CompletedProcess(args=['python', 'manage.py', 'select_questions', '8.0'], returncode=1)