# Read and prepare Dataset

In [83]:
#import necessary libraries
import pandas as pd
import numpy as np
import panel as pn
import hvplot.xarray
import hvplot.pandas
import datetime as dt
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import cycle
import mysql.connector

pn.extension('tabulator')
pn.extension()
from sqlalchemy import create_engine

In [84]:
#read dataset
data = pd.read_csv('D:/1TU/Wirtschaftsinformatik/Bachelorarbeit/Dashboard/learning_analytics_dashboard/example_data_set.csv')

#make data interactive
data_int = data.interactive()

In [85]:
'''# add failed columns
data['exercise 1 failed'] = np.where(data['exercise 1 passed'] == 0, 1, 0)
data['exercise 2 failed'] = np.where(data['exercise 2 passed'] == 0, 1, 0)
data['exercise 3 failed'] = np.where(data['exercise 3 passed'] == 0, 1, 0)
data['exercise 4 failed'] = np.where(data['exercise 4 passed'] == 0, 1, 0)
data['exercise 5 failed'] = np.where(data['exercise 5 passed'] == 0, 1, 0)
data['exercise 6 failed'] = np.where(data['exercise 6 passed'] == 0, 1, 0)'''

# add subject column
subject_list = cycle(['Deutsch', 'Englisch', 'Mathematik', 'Biologie', 'Musik', 'Kunst', 'Sport'])    
data['subject'] = [next(subject_list) for subject in range(len(data))]

# add class column
class_list = cycle(['Klasse 1a', 'Klasse 1d', 'Klasse 3b', 'Klasse 4c', 'Klasse 5a'])
data['class'] = [next(class_list) for klasse in range(len(data))]

# add exercise column
exercise_list = cycle(['Aufgabe 1', 'Aufgabe 2', 'Aufgabe 3', 'Aufgabe 4', 'Aufgabe 5', 'Aufgabe 6'])
data['exercise'] = [next(exercise_list) for exercise in range(len(data))]

# add 'Allgemein' column
data['Allgemein'] = 'Allgemein'

# add datetime column and convert to datetime object
data['datetime'] = data['date'].astype(str) + ' ' + data['time']
data['datetime'] = data['datetime'].str.replace(',', '')
#data['datetime'] = pd.to_datetime(data['datetime'], '%b %d %Y %H:%M:%S')
data['datetime'] = pd.to_datetime(data['datetime'])

In [86]:
# all the lists
subject_list = data['subject'].unique().tolist()
class_list = data['class'].unique().tolist()
exercise_list = data['exercise'].unique().tolist()

#name_list = data['name'].values.tolist()
name_list = data['name'].unique().tolist()
name_list.sort()

default_exercise = ['Allgemein']
class_list_with_none = ['Keine Auswahl'] + class_list
name_list_with_none = ['Keine Auswahl'] + name_list

In [87]:
# replace 'exercise' with 'Aufgabe'
#make_german = data.columns.str.replace('exercise', 'Aufgabe')
#make_german = data.columns.str.replace('name', 'Name')
#make_german = data.columns.str.replace('score', 'Punktzahl')
#make_german = data.columns.str.replace('subject', 'Fach')
#data.columns = make_german

data = data.rename(columns = {'name': 'Name',
                              'duration': 'Bearbeitungsdauer',
                              'number of tries': 'Anzahl Versuche',
                              'subject': 'Fach',
                              'exercise': 'Aufgabe',
                              'class': 'Klasse',
                              'score': 'Punktzahl',
                              'datetime': 'Startzeit'
                             })

In [88]:
data = data.replace(26, np.nan)

In [89]:
data['Alle Fächer'] = 'Alle Fächer'
data['Klasse'] = data['Klasse'].apply({'Klasse 1d': 'Klasse 1a', 'Klasse 4c': 'Klasse 3b', 'Klasse 5a': 'Klasse 8.2', 'Klasse 1a': 'Klasse 1a', 'Klasse 3b': 'Klasse 3b'}.get)
data = data.drop(columns = ['date', 'time', 'passed'])
data = data.rename(columns = {'Bearbeitungsdauer': 'Quiz Bearbeitungsdauer', 'Startzeit': 'Quiz Startzeit'})

# https://stackoverflow.com/questions/53543061/convert-string-to-timedelta-in-pandas
temp = data['Quiz Bearbeitungsdauer'].str.split(':', expand = True).astype(int)
new_column = pd.to_timedelta(temp[0], unit = 'h') + pd.to_timedelta(temp[1], unit = 'm') + pd.to_timedelta(temp[2], unit = 's')
data['Quiz Bearbeitungsdauer'] = new_column
data['Quiz Abgabezeit'] = data['Quiz Startzeit'] + data['Quiz Bearbeitungsdauer']

data['Maximale Punktzahl'] = 100
data['Maximale Note in %'] = 100
data['Beste Punktzahl in %'] = data['Punktzahl']

In [93]:
data.to_csv('extension_for_moodle_data.csv', index = False)


***
# Functionality: Create buttons and menus

## Button to choose exercise

In [8]:
# exercise buttons
exercise_button = pn.widgets.RadioButtonGroup(
    name='exercise_button', options=default_exercise + exercise_list, button_type='success', orientation = 'vertical')

exercise_button

## Button to choose subject

In [9]:
subject_button = pn.widgets.RadioButtonGroup(
    name='Fach auswählen', options=subject_list, button_type='success')

## Menu to choose class and student

In [10]:
class_selection = pn.widgets.Select(name='Klasse auswählen', options = class_list_with_none)
class_selection

In [11]:
student_selection = pn.widgets.Select(name='Schüler*in auswählen', options = name_list_with_none)
student_selection

## Menu to choose date range

In [12]:
# datetime range
values = (dt.datetime(2023, 1, 1, 0, 0), datetime.now())
datetime_range_picker = pn.widgets.DatetimeRangePicker(name='Zeitraum auswählen', value=values)
datetime_range_picker

In [13]:
# start date
start_date_picker = pn.widgets.DatetimePicker(name='Startdatum auswählen', value=dt.datetime(2023, 1, 1, 0, 0))
start_date_picker

In [14]:
# end date
end_date_picker = pn.widgets.DatetimePicker(name='Enddatum auswählen', value=dt.datetime(2023, 12, 31, 0, 0))
end_date_picker

# Helper functions

## helper functions to process data based on conditions (e.g. all data for exercise 1)

In [15]:
# get columns that are essential every time (for example 'class')
essential_cols = data.drop(data.columns[data.columns.str.contains('Aufgabe')], axis = 1).columns

# get attributes across all exercises (for example score for each exercise)
def get_specific_attribute(col_name, df):
    data_temp1 = df.filter(regex = col_name)
    data_temp2 = df.filter(essential_cols)
    df = pd.concat([data_temp1, data_temp2], axis = 1)
    return df

In [16]:
# filter dataset for only one specific exercise based on the exercise_button
def get_exercise(exercise, df):
    temp1 = df.filter(regex = exercise)
    temp2 = df.filter(essential_cols)
    df = pd.concat([temp1, temp2], axis = 1)
    
    #remove the word 'Aufgabe' from all column headers but eg 'Aufgabe 1'  
    new_col_headers = df.columns.str.replace(exercise + ' ', '')
    df.columns = new_col_headers

    return df

In [17]:
# filter dataset by class
def get_class(class_name, df):
    df = df.loc[df['class'] == class_name]
    return df

In [18]:
# filter dataset by subject
def get_class(subject_name, df):
    df = df.loc[df['subject'] == subject_name]
    return df

In [19]:
# filter dataset by date
def get_date(date, df):
    col_name = df.filter(regex = 'date')
    temp = df.loc[df[col_name] == date]
    return temp

In [20]:
# filter dataset by time
def get_time(time, df):
    col_name = df.filter(regex = 'time')
    temp = df.loc[df[col_name] == time]
    return temp

In [21]:
# convert date and time of datetime_range_picker to date and time of dataset
months = ['Jan', 'Feb', 'Mar', 'Apr', 'Mai', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

def convert_date(date):
    picker_value = datetime_range_picker.value
    (start, end) = picker_value
    
    start_month = start.month
    end_month = end.month
    
    i = 0
    while(i < 12):
        if i == start.month:
            start_month = months[i-1]
        if i == end.month:
            end_month = months[i-1]
        i += 1
        
    start_date = start_month + ' ' + str(start.day) + ', ' + str(start.year)
    end_date = end_month + ' ' + str(end.day) + ', ' + str(end.year)
    
    return start_date, end_date

def convert_time(time):
    picker_value = datetime_range_picker.value
    (start, end) = picker_value    
    start_time = start.time()
    end_time = end.time()
       
    return start_time, end_time

# plots

## exercise

### count

In [22]:
data['Anzahl'] = data.groupby(['Punktzahl'])['Klasse'].transform('count')
count_int = data.interactive()

count_pipeline = (
    count_int[
        (count_int['Fach'] == subject_button) &
        ((count_int['Startzeit'] >= start_date_picker) & (count_int['Startzeit'] <= end_date_picker)) &
        ((count_int['Aufgabe'] == exercise_button) | (count_int['Allgemein'] == exercise_button)) &
        ((count_int['Klasse'] == class_selection) | (count_int['Name'] == student_selection))
    ]
    .groupby(['Punktzahl'])['Anzahl'].count()
    .to_frame()
    .reset_index()
    .sort_values(by='Punktzahl')
    .reset_index(drop=True)
)

In [23]:
count_plot = count_pipeline.hvplot.scatter(x='Punktzahl', y='Anzahl', title='Anzahl der Punktzahl', xlabel = 'Punktzahl', ylabel = 'Anzahl Schüler*innen').output()

### mean

In [24]:
data['Durschschnitt Punktzahl'] = data.groupby(['Aufgabe'])['Punktzahl'].transform('mean')
data['Durschschnitt Fehlversuche'] = data.groupby(['Aufgabe'])['Anzahl Versuche'].transform('mean')
#data['Durschschnitt Dauer'] = data.groupby(['Aufgabe'])['duration'].transform('mean')
mean_int = data.interactive()

mean_pipeline = (
    mean_int[
        (mean_int['Fach'] == subject_button) &
        ((mean_int['Startzeit'] >= start_date_picker) & (mean_int['Startzeit'] <= end_date_picker)) &
        ((mean_int['Aufgabe'] == exercise_button) | (mean_int['Allgemein'] == exercise_button)) &
        ((mean_int['Klasse'] == class_selection) | (mean_int['Name'] == student_selection))
    ]
    .groupby(['Aufgabe', 'Durschschnitt Fehlversuche'])['Durschschnitt Punktzahl'].mean()
    .to_frame()
    .reset_index()
    .reset_index(drop=True)
)

In [25]:
mean_table = mean_pipeline.pipe(pn.widgets.Tabulator, pagination = 'remote', page_size = 10, theme = 'fast', hidden_columns = ['index']).output()

# Create Dashboard

In [26]:
#Layout using Template
template = pn.template.FastListTemplate(
    title='Mein Dashboard', 
    sidebar=[pn.pane.Markdown("# Auswahl"),  
             class_selection,
             student_selection,
             exercise_button, 
             pn.pane.Markdown("## Einstellungen")],
    main=[pn.Row(pn.Column(subject_button),
                 pn.Column(start_date_picker, height = 400), 
                 pn.Column(end_date_picker)),
         pn.Row(pn.Column(count_plot), 
                pn.Column(mean_table))],
          #pn.Row(pn.Column(pn.pane.Matplotlib(exercise_plot[0])),
                #pn.Column(pn.pane.Matplotlib(exercise_plot[1])))],
    accent = '#88d8b0'
)

template.show()

#template.servable();
#command: panel serve --port 53369 learning_analytics_dashboard.ipynb in terminal then click on link

Launching server at http://localhost:50131


<panel.io.server.Server at 0x25f842936d0>