### Import Libs

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import sqlite3
import ast

### Df Params

In [20]:
get_email = False
af_by_name = True

### Getting df from db

In [21]:
columns = [f'Mise {i}' for i in range(1,6)] + [i for i in range(1,58)]
query_columns = 'Mises, AF'
if get_email:
    columns = ['Email'] + columns
    query_columns = query_columns + ', email'

df = pd.DataFrame(columns = columns)
AF_dict = pd.read_pickle('Dicionario.pkl')

conn = sqlite3.connect("banco_de_dados.db")
cursor = conn.cursor()
cmd = f"SELECT {query_columns} from questionnaire_s8 ORDER BY ID ASC"
cursor.execute(cmd)
query_result = cursor.fetchall()

for i in range(len(query_result)):
    disciplinas = ast.literal_eval(query_result[i][1])
    if af_by_name: disciplinas = list(map(lambda x: AF_dict[x],disciplinas))
    
    if len(disciplinas) < 57:
        missing = 57 - len(disciplinas)
        disciplinas += ['-' for i in range(missing)]
    
    if get_email:
        row = pd.Series([query_result[i][2]] + ast.literal_eval(query_result[i][0]) +  disciplinas, index=df.columns)
    else:
        row = pd.Series(ast.literal_eval(query_result[i][0]) +  disciplinas, index=df.columns)

    df = df.append(row, ignore_index=True)

### Creating Visualization

#### Organizing data

In [64]:
dict_AF = {}

for index in df.index:
    subjects = df.loc[index, [i+1 for i in range(57)]]

    for subject_index in range(1, subjects.shape[0]+1):
        subject = subjects.loc[subject_index]

        if subject in dict_AF:
            dict_AF[subject].append(subject_index)
        else: 
            dict_AF[subject] = [subject_index]

In [65]:
dict_mises = {}

for index in df.index:
    mises = df.loc[index, [f'Mise {i+1}' for i in range(5)]]

    for position in range(1, mises.shape[0]+1):
        mise = mises.loc[f'Mise {position}']

        if f'Mise {position}' in dict_mises:
            dict_mises[f'Mise {position}'].append(mise)
        else: 
            dict_mises[f'Mise {position}'] = [mise]


In [89]:
dict_AF = dict(reversed(sorted(dict_AF.items())))  # Afs in alphabetical order

#### Creating plot

In [None]:
with PdfPages('histogramas.pdf') as pdf:
    for subject, classifications in dict_AF.items():
        if subject == '-':  # skip '-'
            continue
        
        plt.figure(figsize=(17,10))
        plt.hist(classifications, bins=range(1,59), align='left', rwidth=.8)

        loc, labels = plt.yticks()
        locx, labelsx = plt.xticks()
        plt.yticks(np.arange(0, max(loc), step=1))
        plt.grid(axis='y', alpha=0.5)
        plt.xticks(np.arange(1, 58, step=1))

        plt.title(subject, fontsize=20)
        plt.xlabel('Position', fontsize=20)
        plt.ylabel('QuantitÃ© de personnes', fontsize=20)
        pdf.savefig()