In [None]:
import os
import json
import pandas as pd
from zoomin.database.db_access import get_var_names
from zoomin.data.comparison import compare_data

In [None]:
cwd = os.getcwd()
report_path = os.path.join(cwd, "..", "..", "..", "reports", "02_data_comparison")

In [None]:
nl = '\n'
latex_nl = '\\\\'
esc_nl = '\\'

In [None]:
var_names = get_var_names()

In [None]:
replacements = {'begin{tabular}': 'begin{tabularx}{\\textwidth}',
                'end{tabular}': 'end{tabularx}'}

def get_latex_table(df):
    # prepare column_format
    col_format = "| X |"
    for i in range(len(df.columns)-1):
        col_format = f"{col_format} X |"
    
    # pd to latex
    table = df.style.hide(axis="index").to_latex(column_format=col_format, 
                                             position_float="centering",
                                             hrules=True,
                                             environment = "table*", 
                                             position="h"
                                             )
    # replace tabular with tabularx and add width=textwidth
    for key, value in replacements.items():
        table = table.replace(key, value)
        
    return table 

In [None]:
var_source_quality_dict_list = []

In [None]:
for i, var_name in enumerate(var_names):
    print(var_name)
    try:
        comparison_df, details_df_list = compare_data(var_name)
        
        # write .tex file 
        with open(os.path.join(report_path,  "sub_tex_files", f'{i}.tex'), "w") as f:
            #Chapter 
            f.write(f"{esc_nl}chapter{{Variable: {var_name}}}{nl}")

            #Section: Overview table
            f.write(f"{esc_nl}section{{Overview}}{nl}")

            latex_table = get_latex_table(comparison_df)
            f.write(latex_table)
            f.write(nl)

            #Section: Details 
            f.write(f"{esc_nl}section{{Details}}{nl}")
            for details_df in details_df_list:
                latex_table = get_latex_table(details_df)
                f.write(latex_table)
                
        #NOTE: for now just choosing the first one as the best
        # and assigning quality= "good"
        
        var_source_quality_dict_list.append({"var_name": var_name,  
                           "var_source":comparison_df.iloc[0]["Source"], 
                           "var_quality_level": "good"})
        
    except ValueError:
        print(f"No data found for {var_name}.")

In [None]:
quality_df = pd.DataFrame(var_source_quality_dict_list)

In [None]:
quality_df.to_csv(os.path.join(report_path, "quality_df.csv"), index=False)