In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path

# Loading data

In [2]:
from src.load_scripts import load_ipython_item, load_ipython_log

data_path = Path('data/')

item = load_ipython_item(data_path)
log, features = load_ipython_log(data_path, data_path / 'edulint')

In [3]:
## naive model
log['num_mistakes'] = log['linter_messages'].apply(np.sum)

In [4]:
log.head()

Unnamed: 0,id,user,item,answer,correct,moves,responseTime,time,linter_messages,num_mistakes
419,9662,2802184,1,"def numbers(n):\n for i in range(1, n+1):\n...",1,-1,185100,2018-09-12 10:48:02,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
421,9664,2802184,58,def hej_rup(n):\n for x in range(n):\n ...,1,-1,138450,2018-09-12 10:50:49,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
424,9667,2802184,56,def reverse_numbers(n):\n for x in range(n)...,1,-1,132250,2018-09-12 10:53:28,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
425,9668,2802184,57,def even_numbers(n):\n y = 2;\n for x in...,1,-1,270350,2018-09-12 10:58:44,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
426,9669,2802184,59,def animals(n):\n for x in range(n):\n ...,1,-1,81400,2018-09-12 11:00:09,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2


In [5]:
item.head()

Unnamed: 0,name,instructions,solution
1,Výpis čísel,"Napište funkci <b>numbers(n)</b>, která vypíše...","def numbers(n):\n for i in range(1, n + 1):..."
2,Fibonacciho posloupnost,"Napište funkci <b>fibonacci(n)</b>, která vypí...",def fibonacci(n):\n current = 1\n next =...
3,Faktoriál,"Napište funkci <b>factorial(n)</b>, která vrát...",def factorial(n):\n value = 1\n for i in...
4,Abeceda,"Napište funkci <b>alphabet(n)</b>, která vypíš...",def alphabet(n):\n for i in range(n):\n ...
5,Zdvojení znaků,"Napište funkci <b>duplication(text)</b>, která...","def duplication(text):\n output = """"\n f..."


In [6]:
import re

def find_example_message_from_message_code(messages, message_code):
    pattern = r'"[^"]*{}[^"]*"'.format(re.escape(message_code))
    message = re.search(pattern, messages)
    if message is None:
        return f'"{message_code}_unknown"'
    return message.group()


with open(data_path / 'edulint' / 'results.txt') as f:
    messages = f.read()
    feature_descriptions = {}
    for feature_name in features:
        feature_descriptions[feature_name] = find_example_message_from_message_code(messages, feature_name.upper())

In [7]:
messages = pd.DataFrame(np.vstack(log['linter_messages']), columns=features, index=log.index)

def show_example_for_message_code(code, idx=0):
    print('Description:', feature_descriptions[code], '\nExample:')
    print(log[messages[code] > 0].iloc[idx]['answer'])

# Exploring linter messages

In [8]:
freq = (np.vstack(log['linter_messages']) > 0).sum(axis=0)
x, y = zip(*sorted(zip(feature_descriptions.values(), np.round(freq / sum(freq), 4)), key=lambda x: x[1]))
px.bar(x=x, y=y)

# How likely are users to repeat each of the detected mistakes?

In [9]:
features

array(['c0103', 'e0102', 'e101', 'e111', 'e117', 'e201', 'e202', 'e203',
       'e211', 'e221', 'e222', 'e225', 'e227', 'e228', 'e231', 'e251',
       'e261', 'e262', 'e265', 'e271', 'e272', 'e275', 'e302', 'e303',
       'e305', 'e501', 'e701', 'e703', 'e741', 'f811', 'f841', 'r1704',
       'r1705', 'r1723', 'r6201', 'r6304', 'r6305', 'r6602', 'r6604',
       'r6606', 'r6611', 'w0101', 'w0104', 'w0127', 'w0622', 'w191',
       'w291', 'w293'], dtype=object)

# How severe is each of the messages?

## Does it correlate with an unsucessful submission?

In [10]:
from scipy.stats import spearmanr
from sklearn.metrics import f1_score, confusion_matrix

correlations = {}
significant = []
for msg in messages.columns:
    corr, p = spearmanr(1 - messages[msg], log['correct'])
    correlations[feature_descriptions[msg]] = corr
    significant.append(p < 0.05)

x, y = zip(*sorted(correlations.items(), key=lambda x: x[1]))
px.bar(x=x, y=y, color=significant)

In [11]:
import plotly.figure_factory as ff

def plot_confusion_matrix(cm):
    # Define labels with counts
    labels = [f'{label}\n{count}' for label, count in zip(
        ['True Negatives', 'False Positives', 'False Negatives', 'True Positives'], 
        [cm[0,0], cm[0,1], cm[1,0], cm[1,1]]
    )]

    # Create confusion matrix table
    cm_table = ff.create_annotated_heatmap(z=cm, x=['Predicted 0', 'Predicted 1'], y=['Actual 0', 'Actual 1'], colorscale='Blues')
    cm_table.update_layout(title_text='Confusion Matrix', xaxis=dict(title='Predicted label'), yaxis=dict(title='True label'))

    # Add labels to the confusion matrix
    for i in range(len(cm_table.layout.annotations)):
        cm_table.layout.annotations[i].text = labels[i]

    # Show confusion matrix
    cm_table.show()
    

def inspect_message(code, idx=0):
    show_example_for_message_code(code, idx)
    print('f1 score:', f1_score(log['correct'], 1 - messages[code]))
    plot_confusion_matrix(confusion_matrix(log['correct'], 1 - messages[code]))

In [20]:
inspect_message('e227', 30)

Description: "E227_missing_whitespace_around_bitwise_or_shift_operator_" 
Example:
def powers(n):
    print(n, "na druhou je", n^3)
    print(n, "na treti neni", n)
f1 score: 0.6457550840862178


In [13]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score

X_train, X_test, y_train, y_test = train_test_split(messages, log['correct'], test_size=0.33, random_state=42)
clf = GaussianNB().fit(X_train, y_train)
scores = cross_val_score(clf, X_train, y_train, cv=5, scoring='f1')
print(scores)

[0.64859045 0.64981078 0.65121125 0.65030795 0.65119911]


In [14]:
from sklearn.inspection import permutation_importance

importance = permutation_importance(clf, X_test, y_test, scoring='f1', random_state=0)

In [15]:
fig = go.Figure()
for i in range(len(features)):
    if importance.importances_mean[i] > 0.0001:
        fig.add_trace(go.Box(x=importance.importances[i], name=feature_descriptions[features[i]]))

fig.update_layout(title="Permutation Feature Importance for Gaussian Naive Bayes Model Predicting Success of Submission from Linter Messages",
                  yaxis_title="Features",
                  xaxis_title="Importance",
                  showlegend=False)
fig.show()