In [None]:
from pathlib import Path
import pandas as pd
from src.load_scripts import load_defects


data_path = Path('data/ib111')

log = pd.read_csv(data_path / 'log.csv')
messages = pd.read_csv(data_path / 'messages.csv')
defects = load_defects('data/defects.csv')

In [None]:
# dictionary mapping edulint codes to defect ids
code_to_id = {val: idx for idx, val in defects['EduLint code'].explode().items()}

In [None]:
# dropping messages without an assigned defect
messages = messages[messages['code'].isin(code_to_id.keys())]
messages['defect_id'] = messages['code'].map(code_to_id)


In [None]:
df = messages.merge(log, on='path')

In [None]:
df.info()

In [None]:
df = messages.groupby("path").agg({"defect": pd.Series}).reset_index()

In [None]:
import math
from pathlib import Path

import dash
import dash_bootstrap_components as dbc
import numpy as np
import pandas as pd
import plotly.graph_objs as go
import plotly.subplots as sp
from dash import Input, Output, dcc, html

####################################################################################################
##                                            Layout                                              ##
####################################################################################################

USER_ID = 28594603
# USER_ID = 2121562

data_path = Path("data/ipython_old/cache/")

log = pd.read_csv(data_path / "log.csv", sep=";", header=0, index_col=0)
log["time"] = pd.to_datetime(log["time"])
items = pd.read_csv(data_path / "items.csv", sep=";", header=0, index_col=0)
defects = pd.read_csv(data_path / "defects.csv", sep=";", header=0, index_col=0)
defect_log = pd.read_csv(data_path / "defect_log.csv", sep=";", header=0, index_col=0)

user_history = log[log["user"] == USER_ID].sort_values("time")
if user_history.shape[0] == 0:
    raise ValueError("No history for this user.")

user_history["final"] = np.append(user_history["item"].iloc[:-1].values != user_history["item"].iloc[1:].values, True)
session_ids = (user_history['time'].diff() > pd.Timedelta(hours=1)).cumsum().tolist()

In [None]:
####################################################################################################
##                                            Layout                                              ##
####################################################################################################

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

cols = math.floor(math.sqrt(session_ids[-1] + 1))
rows = math.ceil((session_ids[-1] + 1) / cols)

session_figure = sp.make_subplots(rows=rows, cols=cols, shared_yaxes='all')
for session_id in np.unique(session_ids):
    session_data = user_history[session_ids == session_id]
    session_figure.add_trace(
        go.Scatter(x=session_data["time"], y=session_data["answer"].apply(len), mode="markers", marker=dict(
            size=10,
            color=session_data["correct"].apply(lambda x: "green" if x else "red"),
            symbol=session_data["final"].apply(lambda x: "x" if x else "circle"),
        )),
        col=session_id // rows + 1,  # index starts at 1
        row=session_id % rows + 1,  # index starts at 1
    )
session_figure.update_layout(
    margin=dict(l=0,r=0,b=0,t=40),
    showlegend=False,
)
session_figure.update_xaxes(
    tickformat="%H:%M<br>%d-%m"
)
session_figure

In [None]:
user_history['answer'].apply(len)