# Read and prepare Dataset

In [1]:
#import necessary libraries
import pandas as pd
import numpy as np
import panel as pn
import hvplot.xarray
import hvplot.pandas
import datetime as dt
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import cycle

pn.extension('tabulator')
pn.extension()

In [2]:
#read dataset
data = pd.read_csv('D:/1TU/Wirtschaftsinformatik/Bachelorarbeit/Dashboard/learning_analytics_dashboard/example_data_set.csv')

#make data interactive
data_int = data.interactive()

In [3]:
'''# add failed columns
data['exercise 1 failed'] = np.where(data['exercise 1 passed'] == 0, 1, 0)
data['exercise 2 failed'] = np.where(data['exercise 2 passed'] == 0, 1, 0)
data['exercise 3 failed'] = np.where(data['exercise 3 passed'] == 0, 1, 0)
data['exercise 4 failed'] = np.where(data['exercise 4 passed'] == 0, 1, 0)
data['exercise 5 failed'] = np.where(data['exercise 5 passed'] == 0, 1, 0)
data['exercise 6 failed'] = np.where(data['exercise 6 passed'] == 0, 1, 0)'''

# add subject column
subject_list = cycle(['Deutsch', 'Englisch', 'Mathematik', 'Biologie', 'Musik', 'Kunst', 'Sport'])    
data['subject'] = [next(subject_list) for subject in range(len(data))]

# add class column
class_list = cycle(['Klasse 1a', 'Klasse 1d', 'Klasse 3b', 'Klasse 4c', 'Klasse 5a'])
data['class'] = [next(class_list) for klasse in range(len(data))]

# add exercise column
exercise_list = cycle(['Aufgabe 1', 'Aufgabe 2', 'Aufgabe 3', 'Aufgabe 4', 'Aufgabe 5', 'Aufgabe 6'])
data['exercise'] = [next(exercise_list) for exercise in range(len(data))]

# add 'Allgemein' column
data['Allgemein'] = 'Allgemein'

# add datetime column and convert to datetime object
data['datetime'] = data['date'].astype(str) + ' ' + data['time']
data['datetime'] = data['datetime'].str.replace(',', '')
#data['datetime'] = pd.to_datetime(data['datetime'], '%b %d %Y %H:%M:%S')
data['datetime'] = pd.to_datetime(data['datetime'])

In [4]:
# all the lists
subject_list = data['subject'].unique().tolist()
class_list = data['class'].unique().tolist()
exercise_list = data['exercise'].unique().tolist()

#name_list = data['name'].values.tolist()
name_list = data['name'].unique().tolist()
name_list.sort()

default_exercise = ['Allgemein']
class_list_with_none = ['Keine Auswahl'] + class_list
name_list_with_none = ['Keine Auswahl'] + name_list

In [5]:
# replace 'exercise' with 'Aufgabe'
make_german = data.columns.str.replace('exercise', 'Aufgabe')
data.columns = make_german

***
# Functionality: Create buttons and menus

## Button to choose exercise

In [6]:
# exercise buttons
exercise_button = pn.widgets.RadioButtonGroup(
    name='exercise_button', options=default_exercise + exercise_list, button_type='success', orientation = 'vertical')

exercise_button

## Button to choose subject

In [7]:
subject_button = pn.widgets.RadioButtonGroup(
    name='Fach auswählen', options=subject_list, button_type='success')

## Menu to choose class and student

In [8]:
class_selection = pn.widgets.Select(name='Klasse auswählen', options = class_list_with_none)
class_selection

In [9]:
student_selection = pn.widgets.Select(name='Schüler*in auswählen', options = name_list_with_none)
student_selection

## Menu to choose date range

In [10]:
# datetime range
values = (dt.datetime(2023, 1, 1, 0, 0), datetime.now())
datetime_range_picker = pn.widgets.DatetimeRangePicker(name='Zeitraum auswählen', value=values)
datetime_range_picker

In [11]:
# start date
start_date_picker = pn.widgets.DatetimePicker(name='Startdatum auswählen', value=dt.datetime(2023, 1, 1, 0, 0))
start_date_picker

In [12]:
# end date
end_date_picker = pn.widgets.DatetimePicker(name='Enddatum auswählen', value=dt.datetime(2023, 12, 31, 0, 0))
end_date_picker

# Helper functions

## helper functions to process data based on conditions (e.g. all data for exercise 1)

In [13]:
# get columns that are essential every time (for example 'class')
essential_cols = data.drop(data.columns[data.columns.str.contains('Aufgabe')], axis = 1).columns

# get attributes across all exercises (for example score for each exercise)
def get_specific_attribute(col_name, df):
    data_temp1 = df.filter(regex = col_name)
    data_temp2 = df.filter(essential_cols)
    df = pd.concat([data_temp1, data_temp2], axis = 1)
    return df

In [14]:
# filter dataset for only one specific exercise based on the exercise_button
def get_exercise(exercise, df):
    temp1 = df.filter(regex = exercise)
    temp2 = df.filter(essential_cols)
    df = pd.concat([temp1, temp2], axis = 1)
    
    #remove the word 'Aufgabe' from all column headers but eg 'Aufgabe 1'  
    new_col_headers = df.columns.str.replace(exercise + ' ', '')
    df.columns = new_col_headers

    return df

In [15]:
# filter dataset by class
def get_class(class_name, df):
    df = df.loc[df['class'] == class_name]
    return df

In [16]:
# filter dataset by subject
def get_class(subject_name, df):
    df = df.loc[df['subject'] == subject_name]
    return df

In [17]:
# filter dataset by date
def get_date(date, df):
    col_name = df.filter(regex = 'date')
    temp = df.loc[df[col_name] == date]
    return temp

In [18]:
# filter dataset by time
def get_time(time, df):
    col_name = df.filter(regex = 'time')
    temp = df.loc[df[col_name] == time]
    return temp

In [19]:
# convert date and time of datetime_range_picker to date and time of dataset
months = ['Jan', 'Feb', 'Mar', 'Apr', 'Mai', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

def convert_date(date):
    picker_value = datetime_range_picker.value
    (start, end) = picker_value
    
    start_month = start.month
    end_month = end.month
    
    i = 0
    while(i < 12):
        if i == start.month:
            start_month = months[i-1]
        if i == end.month:
            end_month = months[i-1]
        i += 1
        
    start_date = start_month + ' ' + str(start.day) + ', ' + str(start.year)
    end_date = end_month + ' ' + str(end.day) + ', ' + str(end.year)
    
    return start_date, end_date

def convert_time(time):
    picker_value = datetime_range_picker.value
    (start, end) = picker_value    
    start_time = start.time()
    end_time = end.time()
       
    return start_time, end_time

# plots

## exercise

### count

In [20]:
data['count'] = data.groupby(['score'])['class'].transform('count')
count_int = data.interactive()

count_pipeline = (
    count_int[
        (count_int['subject'] == subject_button) &
        ((count_int['datetime'] >= start_date_picker) & (count_int['datetime'] <= end_date_picker)) &
        ((count_int['Aufgabe'] == exercise_button) | (count_int['Allgemein'] == exercise_button)) &
        ((count_int['class'] == class_selection) | (count_int['name'] == student_selection))
    ]
    .groupby(['score'])['count'].count()
    .to_frame()
    .reset_index()
    .sort_values(by='score')
    .reset_index(drop=True)
)

In [21]:
count_plot = count_pipeline.hvplot.scatter(x='score', y='count', title='Anzahl der Punktzahl', xlabel = 'Punktzahl', ylabel = 'Anzahl Schüler*innen').output()

### mean

In [22]:
data['Durschschnitt Punktzahl'] = data.groupby(['Aufgabe'])['score'].transform('mean')
data['Durschschnitt Fehlversuche'] = data.groupby(['Aufgabe'])['number of tries'].transform('mean')
#data['Durschschnitt Dauer'] = data.groupby(['Aufgabe'])['duration'].transform('mean')
mean_int = data.interactive()

mean_pipeline = (
    mean_int[
        (mean_int['subject'] == subject_button) &
        ((mean_int['datetime'] >= start_date_picker) & (mean_int['datetime'] <= end_date_picker)) &
        ((mean_int['Aufgabe'] == exercise_button) | (mean_int['Allgemein'] == exercise_button)) &
        ((mean_int['class'] == class_selection) | (mean_int['name'] == student_selection))
    ]
    .groupby(['Aufgabe', 'Durschschnitt Fehlversuche'])['Durschschnitt Punktzahl'].mean()
    .to_frame()
    .reset_index()
    .reset_index(drop=True)
)

In [23]:
data

Unnamed: 0,name,date,time,score,passed,duration,number of tries,subject,class,Aufgabe,Allgemein,datetime,count,Durschschnitt Punktzahl,Durschschnitt Fehlversuche
0,Risa Kramer,"Aug 29, 2023",21:29:20,26,1,0:37:10,1,Deutsch,Klasse 1a,Aufgabe 1,Allgemein,2023-08-29 21:29:20,10,54.12,1.88
1,Jelani Patel,"Aug 22, 2023",19:44:39,52,1,0:11:40,3,Englisch,Klasse 1d,Aufgabe 2,Allgemein,2023-08-22 19:44:39,5,50.84,1.84
2,Vaughan Craig,"Aug 12, 2023",16:40:11,25,0,0:44:00,3,Mathematik,Klasse 3b,Aufgabe 3,Allgemein,2023-08-12 16:40:11,5,55.24,2.36
3,Robin Cline,"Aug 6, 2023",5:50:36,21,1,0:50:09,2,Biologie,Klasse 4c,Aufgabe 4,Allgemein,2023-08-06 05:50:36,15,51.40,2.64
4,Wyoming Thomas,"Aug 19, 2023",14:10:22,24,1,1:10:34,2,Musik,Klasse 5a,Aufgabe 5,Allgemein,2023-08-19 14:10:22,15,41.32,1.84
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
745,Ryder Wilkinson,"Aug 2, 2023",2:51:26,60,1,1:04:52,2,Biologie,Klasse 1a,Aufgabe 2,Allgemein,2023-08-02 02:51:26,10,50.84,1.84
746,Zeus Bass,"Aug 10, 2023",7:50:14,73,1,1:00:43,4,Musik,Klasse 1d,Aufgabe 3,Allgemein,2023-08-10 07:50:14,10,55.24,2.36
747,Reese Sellers,"Aug 8, 2023",16:21:40,58,1,1:06:16,3,Kunst,Klasse 3b,Aufgabe 4,Allgemein,2023-08-08 16:21:40,10,51.40,2.64
748,Ezra Lewis,"Aug 6, 2023",11:47:49,79,1,1:07:07,1,Sport,Klasse 4c,Aufgabe 5,Allgemein,2023-08-06 11:47:49,10,41.32,1.84


In [24]:
#mean_table = mean_pipeline.hvplot.scatter(x='exercise', y='mean', title='Anzahl der Punktzahl', xlabel = 'Punktzahl', ylabel = 'Anzahl Schüler*innen').output()
mean_table = mean_pipeline.pipe(pn.widgets.Tabulator, pagination = 'remote', page_size = 10, theme = 'fast', hidden_columns = ['index']).output()
mean_table

# Create Dashboard

In [31]:
#Layout using Template
template = pn.template.FastListTemplate(
    title='Mein Dashboard', 
    sidebar=[pn.pane.Markdown("# Auswahl"),  
             class_selection,
             student_selection,
             exercise_button, 
             pn.pane.Markdown("## Einstellungen")],
    main=[pn.Row(pn.Column(subject_button),
                 pn.Column(start_date_picker, height = 400), 
                 pn.Column(end_date_picker)),
         pn.Row(pn.Column(count_plot), 
                pn.Column(mean_table))],
          #pn.Row(pn.Column(pn.pane.Matplotlib(exercise_plot[0])),
                #pn.Column(pn.pane.Matplotlib(exercise_plot[1])))],
    accent = '#88d8b0'
)

template.show()

#template.servable();
#command: panel serve learning_analytics_dashboard.ipynb in terminal then click on link

Launching server at http://localhost:50935


<panel.io.server.Server at 0x23cc40ded00>

# example

In [26]:
'''import pandas as pd
import hvplot.pandas
from datetime import datetime

# example dataframe
dates = [datetime(2023, 8, 2, 15, 30, 0),
         datetime(2023, 8, 3, 15, 30, 0),
         datetime(2023, 8, 8, 15, 30, 0),
         datetime(2023, 8, 8, 15, 30, 0),
         datetime(2023, 8, 16, 15, 30, 0), 
         datetime(2023, 8, 25, 15, 30, 0)]

df = pd.DataFrame(dates, columns = ['date'])
df['count'] = df.groupby(['date'])['date'].transform('count')

# datetime range picker
values = (dt.datetime(2023, 7, 1, 0, 0), datetime.now())
datetime_range_picker = pn.widgets.DatetimeRangePicker(name='Zeitraum auswählen', value=values)'''

"import pandas as pd\nimport hvplot.pandas\nfrom datetime import datetime\n\n# example dataframe\ndates = [datetime(2023, 8, 2, 15, 30, 0),\n         datetime(2023, 8, 3, 15, 30, 0),\n         datetime(2023, 8, 8, 15, 30, 0),\n         datetime(2023, 8, 8, 15, 30, 0),\n         datetime(2023, 8, 16, 15, 30, 0), \n         datetime(2023, 8, 25, 15, 30, 0)]\n\ndf = pd.DataFrame(dates, columns = ['date'])\ndf['count'] = df.groupby(['date'])['date'].transform('count')\n\n# datetime range picker\nvalues = (dt.datetime(2023, 7, 1, 0, 0), datetime.now())\ndatetime_range_picker = pn.widgets.DatetimeRangePicker(name='Zeitraum auswählen', value=values)"

In [27]:
'''# interactive
inter = df.interactive()

date_pipeline = (
    inter[
        ((datetime_range_picker.value[0] <= inter['date']) & (datetime_range_picker.value[1] >= inter['date']))
    ]
    .groupby(['date']).count()
    .reset_index()
    .sort_values(by='date')
    .reset_index(drop=True)
)
'''

"# interactive\ninter = df.interactive()\n\ndate_pipeline = (\n    inter[\n        ((datetime_range_picker.value[0] <= inter['date']) & (datetime_range_picker.value[1] >= inter['date']))\n    ]\n    .groupby(['date']).count()\n    .reset_index()\n    .sort_values(by='date')\n    .reset_index(drop=True)\n)\n"

In [28]:
#date_plot = date_pipeline.hvplot(x='date', y='count')
#date_plot

In [29]:
'''# example dashboard
template = pn.template.FastListTemplate(
    title='Example Dashboard', 
    main=[pn.Row(pn.Column(datetime_range_picker)),
         pn.Row(pn.Column(date_plot))],
)

template.show()'''

"# example dashboard\ntemplate = pn.template.FastListTemplate(\n    title='Example Dashboard', \n    main=[pn.Row(pn.Column(datetime_range_picker)),\n         pn.Row(pn.Column(date_plot))],\n)\n\ntemplate.show()"