# Setup

In [None]:
# Get project folder
!git clone https://github.com/Survedog/MLTermProject.git project

Cloning into 'project'...
remote: Enumerating objects: 966, done.[K
remote: Total 966 (delta 0), reused 0 (delta 0), pack-reused 966[K
Receiving objects: 100% (966/966), 68.92 MiB | 12.42 MiB/s, done.
Resolving deltas: 100% (1/1), done.
Updating files: 100% (959/959), done.


In [None]:
import pandas as pd
import numpy as np
pd.options.display.max_columns=100


# Load Data

In [None]:
answer_correct_data = pd.read_csv('project/data/train_data/train_task_3_4.csv', na_values='?')
answer_metadata = pd.read_csv('project/data/metadata/answer_metadata_task_3_4.csv', na_values='?')
question_metadata = pd.read_csv('project/data/metadata/question_metadata_task_3_4.csv', na_values='?')
student_metadata = pd.read_csv('project/data/metadata/student_metadata_task_3_4.csv', na_values='?')
subject_metadata = pd.read_csv('project/data/metadata/subject_metadata.csv', na_values='?')

In [None]:
answer_correct_data.head()

In [None]:
answer_metadata.head()

In [None]:
question_metadata.head()

In [None]:
student_metadata.head()

In [None]:
subject_metadata.head()

# Calculate feature values to measure question quality

In [None]:
# Integrate and group information for each question 
answer_integrated = pd.merge(answer_correct_data, answer_metadata, 'inner', 'AnswerId')
answer_integrated_group = answer_integrated.groupby('QuestionId')

In [None]:
# Dataframe storing feature values
train_data = pd.DataFrame(columns=['CorrectRate', 'MeanConfidence', 'AnswerVariance'])
train_data.index.name = 'QuestionId'
train_data[['CorrectRate', 'MeanConfidence']] = answer_integrated_group.mean()[['IsCorrect', 'Confidence']]
train_data['AnswerVariance'] = answer_integrated_group.var()['AnswerValue']

  train_data[['CorrectRate', 'MeanConfidence']] = answer_integrated_group.mean()[['IsCorrect', 'Confidence']]
  train_data['AnswerVariance'] = answer_integrated_group.var()['AnswerValue']


In [None]:
# Replace NaN confidence values
train_data.isnull().sum()
print(answer_integrated[answer_integrated['QuestionId']==1].info())

# There is some questions that none of its answers have confidence info.
# -> Will set mean confidence as their confidence.
train_data['MeanConfidence'] = train_data['MeanConfidence'].fillna(train_data['MeanConfidence'].mean())
train_data.isnull().sum()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 14 entries, 6452 to 1291796
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   QuestionId      14 non-null     int64  
 1   UserId          14 non-null     int64  
 2   AnswerId        14 non-null     int64  
 3   IsCorrect       14 non-null     int64  
 4   CorrectAnswer   14 non-null     int64  
 5   AnswerValue     14 non-null     int64  
 6   DateAnswered    14 non-null     object 
 7   Confidence      0 non-null      float64
 8   GroupId         14 non-null     int64  
 9   QuizId          14 non-null     int64  
 10  SchemeOfWorkId  1 non-null      float64
dtypes: float64(2), int64(8), object(1)
memory usage: 1.3+ KB
None


CorrectRate       0
MeanConfidence    0
AnswerVariance    0
dtype: int64

# Preprocess feature values

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
train_data_scaled = scaler.fit(train_data).transform(train_data)
train_data_scaled = pd.DataFrame(train_data_scaled, index=train_data.index, columns=train_data.columns)

# Measure question quality

In [None]:
question_quality = pd.DataFrame(columns=['QualityMeasure'])
question_quality.index.name = 'QuestionId'

In [None]:
# Get quality measure value
for index in train_data_scaled.index:
    question_quality.loc[index] = \
        (1 - train_data_scaled.at[index, 'CorrectRate']) \
        * train_data_scaled.at[index, 'AnswerVariance'] \
        - train_data_scaled.at[index, 'MeanConfidence']

# Rank each question's quality

In [None]:
question_quality['Rank'] = question_quality['QualityMeasure'].rank(method='first', ascending=False)
question_quality = question_quality.astype(dtype={'QualityMeasure':'float64', 'Rank':'int64'})

In [None]:
# Check if ranking worked correctly
question_quality.describe()

Unnamed: 0,QualityMeasure,Rank
count,948.0,948.0
mean,-0.460858,474.5
std,0.192965,273.808327
min,-0.95679,1.0
25%,-0.613778,237.75
50%,-0.467707,474.5
75%,-0.31021,711.25
max,0.365556,948.0


# Validation

In [None]:
# Load validation data
validation_data = pd.read_csv('project/data/test_data/quality_response_remapped_public.csv', na_values='?')

In [None]:
# Compare each question pair in validation data set
question_quality_compare = []
for index in validation_data.index:
    left_question = validation_data.at[index, 'left']
    right_question = validation_data.at[index, 'right']
    question_quality_compare.append(1 if question_quality['Rank'][left_question] < question_quality['Rank'][right_question] else 2)

In [None]:
# Compare the result with each expert's decision and get score for each
validation_scores = pd.Series([0.0, 0.0, 0.0, 0.0, 0.0])
for index in validation_data.index:
    if question_quality_compare[index] == validation_data['T1_ALR'][index]:
        validation_scores[0] += 1
    if question_quality_compare[index] == validation_data['T2_CL'][index]:
        validation_scores[1] += 1
    if question_quality_compare[index] == validation_data['T3_GF'][index]:
        validation_scores[2] += 1
    if question_quality_compare[index] == validation_data['T4_MQ'][index]:
        validation_scores[3] += 1
    if question_quality_compare[index] == validation_data['T5_NS'][index]:
        validation_scores[4] += 1

for expert in range(5):
    validation_scores[expert] = validation_scores[expert] / len(validation_data)

In [None]:
# Show validation score
print(validation_scores)
print("Max Validation Score: {0}".format(validation_scores.max()))

0    0.68
1    0.72
2    0.80
3    0.68
4    0.60
dtype: float64
Max Validation Score: 0.8


# Get final test score

In [33]:
# Load test data
test_data = pd.read_csv('project/data/test_data/quality_response_remapped_private.csv', na_values='?')

In [34]:
# Compare each question pair in test data set
question_quality_compare = []
for index in test_data.index:
    left_question = test_data.at[index, 'left']
    right_question = test_data.at[index, 'right']
    question_quality_compare.append(1 if question_quality['Rank'][left_question] < question_quality['Rank'][right_question] else 2)

In [35]:
# Compare the result with each expert's decision and get score for each
test_scores = pd.Series([0.0, 0.0, 0.0, 0.0, 0.0])
for index in test_data.index:
    if question_quality_compare[index] == test_data['T1_ALR'][index]:
        test_scores[0] += 1
    if question_quality_compare[index] == test_data['T2_CL'][index]:
        test_scores[1] += 1
    if question_quality_compare[index] == test_data['T3_GF'][index]:
        test_scores[2] += 1
    if question_quality_compare[index] == test_data['T4_MQ'][index]:
        test_scores[3] += 1
    if question_quality_compare[index] == test_data['T5_NS'][index]:
        test_scores[4] += 1

for expert in range(5):
    test_scores[expert] = test_scores[expert] / len(test_data)

In [36]:
# Show validation score
print(test_scores)
print("Max Test Score: {0}".format(test_scores.max()))

0    0.72
1    0.60
2    0.60
3    0.60
4    0.80
dtype: float64
Max Test Score: 0.8
