## Setup

In [61]:
from os import listdir, makedirs
from os.path import join, isdir, splitext
from unidecode import unidecode

from jinja2 import Environment, FileSystemLoader, select_autoescape
import pandas as pd

In [84]:
TEMPLATES_DIR = '../../templates'
CONTENT_DIR = '../../data/4_content'
GRADES_RESULT_DIR = '../../website/notas'

ID_TABLE_PATH = '../../data/3_manually_processed/vagas/2022.csv'
ESSAYS_DIR = '../../data/3_manually_processed/redacoes'
ESSAYS_RESULT_DIR = '../../website/redacoes'

## Support functions

In [63]:
def clean_string(string):
    alphanumerical_chars = [char for char in string if char.isalnum()]
    return unidecode("".join(alphanumerical_chars))

In [64]:
def get_course_name(course_id, id_df):
    return id_df[id_df.id == course_id].curso.iloc[0]

In [65]:
def get_course_institute(course_id, id_df):
    return id_df[id_df.id == course_id].unidade.iloc[0]

In [66]:
def get_course_name_institute(course_id, id_df):
    name = get_course_name(course_id, id_df)
    institure = get_course_institute(course_id, id_df)
    return name, institure

In [67]:
def create_render_save_path(result_dir=None, course_id=None,
                            new_dir=None, filename=None,
                            id_df=None):
    save_dir = ''
    if result_dir is not None and course_id is not None:
        save_dir = join(result_dir, str(course_id))
    if new_dir is not None:
        save_dir = join(save_dir, new_dir)
        makedirs(save_dir, exist_ok=True)
    if filename is None and id_df is not None:
        filename = get_course_name(course_id, id_df)
    clean_filename = clean_string(filename)

    return join(save_dir, clean_filename+'.html')

In [68]:
def save_website(website, path):
    with open(path, mode='w') as f:
        f.write(website)

## Rendering

In [69]:
JINJA_ENV = Environment(
    loader=FileSystemLoader(TEMPLATES_DIR),
    autoescape=select_autoescape())

### Grades

In [70]:
ID_DF = pd.read_csv(ID_TABLE_PATH).convert_dtypes()
print(ID_DF.dtypes)
ID_DF

id              Int64
unidade        string
area           string
curso          string
fuvest_ac       Int64
fuvest_ep       Int64
fuvest_ppi      Int64
fuvest          Int64
enem_ac         Int64
enem_ep_l1      Int64
enem_ep_l3      Int64
enem_ep         Int64
enem_ppi_l2     Int64
enem_ppi_l4     Int64
enem_ppi        Int64
enem            Int64
dtype: object


Unnamed: 0,id,unidade,area,curso,fuvest_ac,fuvest_ep,fuvest_ppi,fuvest,enem_ac,enem_ep_l1,enem_ep_l3,enem_ep,enem_ppi_l2,enem_ppi_l4,enem_ppi,enem
0,0,EACH,E,Bacharelado em Sistemas de Informação,28,10,4,42,2,0,8,8,0,8,8,18
1,1,EACH,E,Bacharelado em Sistemas de Informação,56,21,7,84,4,0,16,16,0,16,16,36
2,2,EACH,E,Licenciatura em Ciências da Natureza,30,9,3,42,0,0,9,9,0,9,9,18
3,3,EACH,B,Bacharelado em Educação Física e Saúde,20,14,8,42,10,4,0,4,4,0,4,18
4,4,EACH,B,Bacharelado em Biotecnologia,30,11,1,42,0,0,7,7,0,11,11,18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172,171,IQ,E,Química – Bacharelado e Licenciatura – Integra...,22,12,8,42,8,2,4,6,2,2,4,18
173,172,IQ,E,Química – Bacharelado e Licenciatura – Noturno...,22,12,8,42,8,2,4,6,2,2,4,18
174,173,IQSC,E,Química Bacharelado (Hab. Fundamental e Hab. T...,24,12,6,42,6,3,3,6,3,3,6,18
175,174,IRI,H,Bacharelado em Relações Internacionais,14,5,2,21,1,3,1,4,3,1,4,9


In [71]:
get_course_name(0, ID_DF)

'Bacharelado em Sistemas de Informação'

#### Courses

In [72]:
def read_course_templates(jinja_env,
        fuvest_filename='notas.html.jinja',
        ):
    # TODO enem_course_template = jinja_env.get_template('curso_enem.html.jinja')
    return (
        jinja_env.get_template(fuvest_filename),
    )
    

In [73]:
for course_id_str in listdir(CONTENT_DIR):
    if isdir(join(CONTENT_DIR, course_id_str)):
        course_id = int(course_id_str)
        course_name = get_course_name(course_id, ID_DF)
        print(course_name)

Pedagogia
Farmácia
Bach. Com. Social: Editoração
Ciências dos Alimentos - Bach.
Psicologia - Bach. e Psicólogo
Licenciatura em Educomunicação
Física – Bacharelado
Bacharelado em Ciências Atuariais
Bacharelado em Finanças e Negócios
Engenharia Elétrica (Ênfase em Eletrônica)
Engenharia de Materiais
Odontologia
Física – Bacharelado
Engenharia Mecatrônica
Bacharelado em Relações Públicas
Arquitetura e Urbanismo
Medicina
Direito
Engenharia Agronômica
Medicina Veterinária
Direito
Engenharia Aeronáutica
Bacharelado em Ciências Físicas e Biomoleculares -Ênfase Tecnológica
Fonoaudiologia
História - Bach./Lic.
Bacharelado em Ciências Econômicas
Engenharia Elétrica (Ênfase em Sistemas de Energia e Automação)
Bacharelado em Física Médica
Engenharia Florestal
Direito
Letras – Bach./Lic.
Matemática - Licenciatura
Engenharia Naval
Filosofia - Bach./Lic.
Nutrição
Bacharelado em Ciências Econômicas
Matemática Aplicada - Bacharelado
Bacharelado em Biotecnologia
Arquitetura e Urbanismo
Bacharelado em Ci

In [74]:
for course_id_str in listdir(CONTENT_DIR):
    course_dir = join(CONTENT_DIR, course_id_str)
    if isdir(course_dir):
        course_id = int(course_id_str)
        course_name = get_course_name(course_id, ID_DF)
        for exam in listdir(course_dir):
            if exam == 'fuvest':
                fuvest_dir = join(course_dir, 'fuvest')
                print(f'{course_name} (ID {course_id_str})')
                print(fuvest_dir)
                print('\n')
            elif exam == 'enem':
                raise NotImplementedError("Only Fuvest grades can be parsed at the moment, not Enem's")
            else:
                raise NotImplementedError(f'Unknown exam type inside directory {course_dir}.')

Pedagogia (ID 91)
../../data/4_content/91/fuvest


Farmácia (ID 76)
../../data/4_content/76/fuvest


Bach. Com. Social: Editoração (ID 24)
../../data/4_content/24/fuvest


Ciências dos Alimentos - Bach. (ID 67)
../../data/4_content/67/fuvest


Psicologia - Bach. e Psicólogo (ID 97)
../../data/4_content/97/fuvest


Licenciatura em Educomunicação (ID 30)
../../data/4_content/30/fuvest


Física – Bacharelado (ID 156)
../../data/4_content/156/fuvest


Bacharelado em Ciências Atuariais (ID 82)
../../data/4_content/82/fuvest


Bacharelado em Finanças e Negócios (ID 90)
../../data/4_content/90/fuvest


Engenharia Elétrica (Ênfase em Eletrônica) (ID 44)
../../data/4_content/44/fuvest


Engenharia de Materiais (ID 36)
../../data/4_content/36/fuvest


Odontologia (ID 130)
../../data/4_content/130/fuvest


Física – Bacharelado (ID 152)
../../data/4_content/152/fuvest


Engenharia Mecatrônica (ID 50)
../../data/4_content/50/fuvest


Bacharelado em Relações Públicas (ID 27)
../../data/4_content/27/

In [75]:
fuvest_dir

'../../data/4_content/42/fuvest'

In [76]:
def read_file_content(filepath):
    with open(filepath, mode='r') as f:
        return f.read()

In [77]:
def parse_fuvest_content(path):
    latest_metrics_plotly_div = read_file_content(join(path, 'latest_metrics.html'))
    metric_evolution_plotly_div = read_file_content(join(path, 'metric_evolution.html'))
    grade_dict = {}
    for year in listdir(path):
        year_dict = {}
        year_dir = join(path, year)
        if isdir(year_dir):
            for quota_filename in listdir(year_dir):
                quota, _ = splitext(quota_filename)
                year_dict[quota] = read_file_content(join(year_dir, quota_filename))
            grade_dict[year] = year_dict

    return (latest_metrics_plotly_div,
            metric_evolution_plotly_div,
            grade_dict)

#### Exam selection

In [78]:
def render_selection(jinja_env, result_dir, course_id, 
                     course_name=None, id_df=None,
                     fuvest=False, enem=False,
                     fuvest_url=None, enem_url=None,
                     template_filename='vestibulares.html.jinja',
                     render_filename='vestibulares'):
    if course_name is None:
        course_name = get_course_name(course_id, id_df)
    selection_template = jinja_env.get_template(template_filename) 
    selection_website = selection_template.render(
        active_link='Notas',
        root_path='../../',
        course_name=course_name,
        render_fuvest=fuvest,
        render_enem=enem,
        fuvest_url=fuvest_url,
        enem_url=enem_url,
        button_text='Acesse as notas')
    selection_path = create_render_save_path(
        result_dir=result_dir,
        course_id=course_id,
        filename=render_filename,
    )
    save_website(selection_website, selection_path)

#### Course selection

In [79]:
def render_course_selection(jinja_env, result_dir, 
                            all_courses_dict=None,
                            template_filename='cursos.html.jinja',
                            render_filename='cursos'):
    courses_template = jinja_env.get_template(template_filename) 
    courses_website = courses_template.render(
        active_link='Notas',
        root_path='../',
        all_courses_dict=all_courses_dict)
    courses_path = join(result_dir, render_filename+'.html')
    save_website(courses_website, courses_path)

#### Final rendering

In [85]:
def render_courses(content_dir, jinja_env, id_df, result_dir):
    fuvest_template, = read_course_templates(jinja_env)
    render_fuvest = False
    render_enem = False
    all_courses_dict = {}

    for course_id_str in listdir(content_dir):
        course_dir = join(content_dir, course_id_str)
        if isdir(course_dir):
            course_id = int(course_id_str)
            course_name, course_institute = get_course_name_institute(course_id, id_df)
            all_courses_dict[f'{course_institute} - {course_name}'] = join(course_id_str, 'vestibulares.html')

            for exam in listdir(course_dir):
                if exam == 'fuvest':
                    render_fuvest = True
                    fuvest_dir = join(course_dir, 'fuvest')
                    latest_metrics, metric_evolution, grade_dict = parse_fuvest_content(fuvest_dir)
                    fuvest_website = fuvest_template.render(
                        active_link='Notas',
                        root_path='../../../',
                        course_name=course_name,
                        latest_metrics_plotly_div=latest_metrics,
                        metric_evolution_plotly_div=metric_evolution,
                        grade_dict=grade_dict)
                    fuvest_path = create_render_save_path(
                        result_dir=result_dir,
                        course_id=course_id,
                        new_dir='fuvest',
                        filename=course_name,
                    )
                    save_website(fuvest_website, fuvest_path)
                elif exam == 'enem':
                    raise NotImplementedError(f'Only Fuvest results can be parsed at the moment, but directory {course_dir} contained Enem results.')
                else:
                    raise ValueError(f'Unknown exam type inside directory {course_dir}.')

            render_selection(jinja_env, result_dir, course_id, course_name,
                             fuvest=render_fuvest, enem=render_enem,
                             fuvest_url=create_render_save_path(new_dir='fuvest', filename=course_name),
                             enem_url=create_render_save_path(new_dir='enem', filename=course_name),
            )
            render_course_selection(jinja_env, result_dir,
                                    all_courses_dict=all_courses_dict)

In [107]:
render_courses(CONTENT_DIR, JINJA_ENV, ID_DF, GRADES_RESULT_DIR)

In [106]:
# !rm -rf ../../website/notas/*

### Essays

#### Lists

In [87]:
def join_essay_files(exam, essays_dir):
    essay_dir = join(essays_dir, exam)
    essay_filenames = listdir(essay_dir)
    essays_list = []
    for essay_filename in essay_filenames:
        essay_path = join(essay_dir, essay_filename)
        essays_list.append(pd.read_csv(essay_path).convert_dtypes())
    return pd.concat(essays_list, axis=0)

In [88]:
enem_essays = join_essay_files('enem', ESSAYS_DIR)
enem_essays

Unnamed: 0,ano,nota,c1,c2,c3,c4,c5,drive_id,url
0,2018,920,180,180,200,180,180,1_i2icBmoym3I6tVwZiiBngcKrA6LbaZO,https://drive.google.com/file/d/1_i2icBmoym3I6...
1,2021,760,160,120,140,160,180,1QOdWS15kBgfka8HrXcjiXdHoUOV9Keu1,https://drive.google.com/file/d/1QOdWS15kBgfka...
2,2021,920,140,200,200,200,180,1__sggpQa_zAn_iacA9_YhhY4KZ7JtnTL,https://drive.google.com/file/d/1__sggpQa_zAn_...
3,2021,980,180,200,200,200,200,17FC_gqSN42JAGxEGPZkbw_y3GfOgKFNm,https://drive.google.com/file/d/17FC_gqSN42JAG...
0,2020,920,160,200,160,200,200,1CA5BuMVUlOGedcrzcyfSHRhuU2ksJCOk,https://drive.google.com/file/d/1CA5BuMVUlOGed...
...,...,...,...,...,...,...,...,...,...
8,2021,940,160,200,180,200,200,1U9HdVZr11EtPXX0Y1Z8cVsSPKLV24_W_,https://drive.google.com/file/d/1U9HdVZr11EtPX...
9,2021,940,160,200,180,200,200,19acd7zWYTzFQME3otPJheoPu01He4Ocn,https://drive.google.com/file/d/19acd7zWYTzFQM...
10,2020,660,140,120,160,120,120,1rZWD5yrcsR35tK3bzpZY-sCgjP074VoB,https://drive.google.com/file/d/1rZWD5yrcsR35t...
11,2021,960,160,200,200,200,200,1hokIHwjrd4cIfhchfARGYg0o0K2mDgea,https://drive.google.com/file/d/1hokIHwjrd4cIf...


In [89]:
fuvest_essays = join_essay_files('fuvest', ESSAYS_DIR)
fuvest_essays

Unnamed: 0,ano,nota,drive_id,url
0,2018,36.5,1ThLLNWnUHgGq55ElFVUFVysSem8o83HY,https://drive.google.com/file/d/1ThLLNWnUHgGq5...
1,2018,33.0,1J6c8CNdx6Yy1ye0gK8DLo1Oo3yUL3DLK,https://drive.google.com/file/d/1J6c8CNdx6Yy1y...
2,2020,38.5,1pJ5_z-00dpX4mbsPSzUsLX4pKFuWE434,https://drive.google.com/file/d/1pJ5_z-00dpX4m...
3,2020,42.0,1p2WISpPRTiRxxb8uzyyf3Qbkd2OgkaiS,https://drive.google.com/file/d/1p2WISpPRTiRxx...
4,2020,43.0,1y0lhK2DInBrNr29fNcmR1jUo3SZKhjZU,https://drive.google.com/file/d/1y0lhK2DInBrNr...
...,...,...,...,...
289,2021,35.0,1thKrNuHXwK2HIOFNh-Q8dVZI9XPQuR3J,https://drive.google.com/file/d/1thKrNuHXwK2HI...
290,2021,47.0,19HoxizBATvv4J5_2cROZHGwtlH33Ip98,https://drive.google.com/file/d/19HoxizBATvv4J...
291,2021,39.5,13THXqRzCQowVPwormQEy0uMtGuZZ5e9e,https://drive.google.com/file/d/13THXqRzCQowVP...
292,2021,38.0,1E1P5VKDYPb3hTIatGLg37bB86TW0HO0V,https://drive.google.com/file/d/1E1P5VKDYPb3hT...


In [90]:
def build_years_dict(essays_df):
    years_dict = {}
    for year, essays in essays_df.groupby('ano'):
        essays_dict = {} 
        for _, essay_series in essays.iterrows():
            essays_dict[essay_series['nota']] = essay_series.to_dict()
        years_dict[year] = essays_dict
    return years_dict

In [91]:
build_years_dict(enem_essays)

{2016: {820: {'ano': 2016,
   'nota': 820,
   'c1': 160,
   'c2': 160,
   'c3': 160,
   'c4': 180,
   'c5': 160,
   'drive_id': '1z8-2IDtPjCJx6PFc0_y0iMQczx5N33dN',
   'url': 'https://drive.google.com/file/d/1z8-2IDtPjCJx6PFc0_y0iMQczx5N33dN/view'}},
 2018: {920: {'ano': 2018,
   'nota': 920,
   'c1': <NA>,
   'c2': <NA>,
   'c3': <NA>,
   'c4': <NA>,
   'c5': <NA>,
   'drive_id': '1AyoH4ZogqtuI64BSZ6HCaRqRE2iePT_V',
   'url': 'https://drive.google.com/file/d/1AyoH4ZogqtuI64BSZ6HCaRqRE2iePT_V/view'},
  940: {'ano': 2018,
   'nota': 940,
   'c1': 180,
   'c2': 200,
   'c3': 180,
   'c4': 180,
   'c5': 200,
   'drive_id': '1TUqOsxw6tigBab-UlxT1BanTA-54saq_',
   'url': 'https://drive.google.com/file/d/1TUqOsxw6tigBab-UlxT1BanTA-54saq_/view'},
  960: {'ano': 2018,
   'nota': 960,
   'c1': 180,
   'c2': 200,
   'c3': 180,
   'c4': 200,
   'c5': 200,
   'drive_id': '1oV7r3jxg7-KqX7wAF5YyD_hL24v01Rzu',
   'url': 'https://drive.google.com/file/d/1oV7r3jxg7-KqX7wAF5YyD_hL24v01Rzu/view'},
  980:

In [92]:
ESSAY_TEMPLATE = JINJA_ENV.get_template('redacoes.html.jinja')
with open('enem.html', 'w') as f:
    f.write(ESSAY_TEMPLATE.render(
        years_dict=build_years_dict(enem_essays),
        exam='Enem',
        criteria=['c1', 'c2', 'c3', 'c4', 'c5'],
        active_link='Redações',
        root_path='../'))

In [93]:
ESSAY_TEMPLATE = JINJA_ENV.get_template('redacoes.html.jinja')
with open('fuvest.html', 'w') as f:
    f.write(ESSAY_TEMPLATE.render(
        years_dict=build_years_dict(fuvest_essays),
        exam='Fuvest',
        active_link='Redações',
        root_path='../'))

#### Exam selection

In [94]:
SELECTION_TEMPLATE = JINJA_ENV.get_template('vestibulares.html.jinja')
selection_website = SELECTION_TEMPLATE.render(
    active_link='Redações',
    root_path='../',
    render_fuvest=True,
    render_enem=True,
    fuvest_url='fuvest.html',
    enem_url='enem.html',
    button_text='Acesse as redações')
save_website(selection_website, 'example_selection.html')

#### Final rendering

In [95]:
def render_essays(jinja_env, essays_dir, result_dir,
                  essay_template_filename='redacoes.html.jinja',
                  selection_template_filename='vestibulares.html.jinja'):
    for exam in ['enem', 'fuvest']:
        exam_essays = join_essay_files(exam, essays_dir)
        years_dict = build_years_dict(exam_essays)
        criteria = None
        if exam == 'enem':
            criteria = ['c1', 'c2', 'c3', 'c4', 'c5'],
        
        essay_template = jinja_env.get_template(essay_template_filename)
        essay_website = essay_template.render(
            years_dict=years_dict,
            exam=exam,
            criteria=criteria,
            active_link='Redações',
            root_path='../')
        save_website(essay_website, join(result_dir, exam+'.html'))
    
    selection_template = jinja_env.get_template(selection_template_filename)
    selection_website = selection_template.render(
        active_link='Redações',
        root_path='../',
        render_fuvest=True,
        render_enem=True,
        fuvest_url='fuvest.html',
        enem_url='enem.html',
        button_text='Acesse as redações')
    save_website(selection_website, join(result_dir, 'vestibulares.html'))

In [108]:
render_essays(JINJA_ENV, ESSAYS_DIR, ESSAYS_RESULT_DIR)

In [105]:
# !rm -rf ../../website/redacoes/*