In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import ipywidgets as widgets
import glob
import os

# If you want to widen the page
# you can modify *width* to the one you prefer

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

In [None]:
# Get files
files = [f for f in glob.glob(os.getcwd() + "/*.xlsx")]
raw_files = [f.split('/')[-1].split('\\')[-1] for f in files]

# Because I can.
practical_nb = widgets.Select(
    options=raw_files,
    value=raw_files[0],
    rows=10,
    description='practical:',
    disabled=False
)

def read_practical(file):
    # Practical selection

    TP = pd.ExcelFile(file)

    # Grade Processing

    df = dict()
    for sheet in TP.sheet_names[1:]:
        df[sheet] = TP.parse(sheet, header=None)

    # Getting all the different exercises
    cols = []

    for i in range(len(df['promo'].loc[0])):
            exercise = str(df['promo'].loc[int(str(df['promo'].loc[0][i]) == 'nan')][i]) # append second line if first line is nan
            while (exercise in cols and str(exercise) != 'nan'):
                exercise += "_"
            cols.append(exercise)
            
    # Grouping columns two by two, so that each column represent one exercise
    for i in range(len(cols)):
        if str(cols[i]) == 'nan':
            cols[i] = cols[i-1] + "_to_drop"
            
    cols[0] = 'login' # Just in case someone forgets...

    promo = pd.DataFrame()

    # Data cleaning for every sheet in the .xlsx
    for sheet in TP.sheet_names[2 - ('CTRL' in file):]:
        df[sheet].columns = cols
        df[sheet].drop([0, 1, 2], inplace=True)
        df[sheet][cols[4:-2]] = df[sheet][cols[4:-2]].apply(lambda a: pd.to_numeric(a, errors='coerce'))
        df[sheet].set_index('login', inplace=True)
        df[sheet]['corrector'].fillna(method='ffill', inplace=True)
        df[sheet].fillna(0, inplace=True)
        for i in range(4, len(cols) - 3, 2):
            df[sheet][cols[i]] += df[sheet][cols[i+1]]
            df[sheet][cols[i]].apply(lambda a: a if a <= 2 else 2) # Check typo in grades
        df[sheet].drop(cols[5:-2:2], axis=1, inplace = True)
        df[sheet]['total'] = df[sheet][cols[4:-2:2]].sum(axis=1) / (len(cols) - 6) * 100
        promo = pd.concat([promo, df[sheet]])
    return promo

practicals = [read_practical(f) for f in files]

In [None]:
display(practical_nb)

In [None]:
# Normalization

print(files[raw_files.index(practical_nb.value)])

stats_promo = practicals[raw_files.index(practical_nb.value)].groupby(['gr']).mean()
stats_promo.loc[:, stats_promo.columns != 'total'] *= 50 # Everything is in % now

In [None]:
# Overview of exercise succes for each class

plt.figure(figsize=(len(stats_promo.columns),8))
sns.heatmap(stats_promo, vmin=0, vmax=100, annot=True, cmap='Greens')

In [None]:
plt.figure(figsize=(6,4))
sns.kdeplot(stats_promo['total'])

In [None]:
p = practicals[raw_files.index(practical_nb.value)]

p[p.columns[4: -3]].plot(kind='density', figsize=(8, (len(p.columns) / 1.5)), subplots=True, xlim=(0, 2), ylim=(0, 1))
plt.show()