In [None]:
from pathlib import Path
import pandas as pd
from fontTools.ttLib.tables.otTraverse import dfs_base_table

import src.DataConsolidation.preprocessing as pp
import src.DataConsolidation.function_utils as fu
import src.DataExploration.data_description as de
import src.DataConsolidation.cleaning as cl
import src.DataConsolidation.data_integration as di

PATH_JSON = Path("../Datasets/JSON_files/")
PATH_PICKLE = Path("../Datasets/xAPI_logs/PICKLE_files/")
PATH_PREPROC = Path("../Datasets/xAPI_logs/PREPROCESSED_files/")
COURSES = [1,2,3,4,5,6,7,8,9,10,11,12]

COURSES_NAMES = ['A',
                 'B',
                 'C',
                 'D',
                 'E',
                 'F',
                 'G',
                 'H',
                 'I',
                 'J',
                 'K',
                 'L',
                 'M',
                 ]

for course in COURSES:
    # ----------------------------
    # DATA PREPROCESSING
    # ----------------------------

    pp.preprocess_json_files(course=course, path_json=PATH_JSON, path_pickle=PATH_PICKLE)
    print('Course: ', course)
    df = pd.read_pickle(f"{PATH_PICKLE}/cours_{course}.pkl")
    # preprocess dataframe data
    df = pp.filter_and_rename_columns(df)
    df = pp.merge_duplicates_missing_values(df)
    df = pp.add_component_event_name(df)
    df = pp.extract_ids(df)
    df = pp.add_course_area(df)
    df = pp.redefine_component(df)
    df = pp.redefine_event_name(df)
    df = pp.add_activity_status(df)

    # preprocess user roles
    role_table = pp.get_role_table(df, course)
    # check for fake students
    # de.preprocessing_description(role_table, df, course)
    students_to_remove = pp.detect_potential_fake_students(df, course)
    role_table = pp.remove_fake_students(role_table, students_to_remove)
    # assign roles
    df = pp.assign_roles(role_table, df, course)

    # ----------------------------
    # DATA INTEGRATION
    # ----------------------------
    # groups
    group_table = di.get_group_table(df, course)
    df = di.assign_groups(group_table, df, course)

    # ----------------------------
    # DATA SELECTION
    # ----------------------------
    # filter data based on the timestamp
    df = pp.filter_semester_data(df, course)

    # ----------------------------
    # DATA CLEANING
    # ----------------------------
    # clean the dataset from worthless events
    df = cl.clean_events(df)

    # exploring the actions performed on course activities
    # de.activities_description(df, course)

    PATH_PREPROC = Path("../Datasets/xAPI_logs/PREPROCESSED_files")
    # save the dataset for the analysis
    df.to_pickle(f"{PATH_PREPROC}/cours_{course}.pkl")
