In [None]:
import os
import pandas as pd
from src.all_in_one import *
down_threshold = "2022-12-31"
upper_threshold = "2024-01-01"

In [None]:
def dataframize_csv(file_path, file_sheet, configuration_file, lower_threshold=None, upper_threshold=None):
    """
    Reads a CSV file, applies data type transformations, and filters the data based on provided date thresholds.
    
    Parameters:
    - file_path: Path to the CSV file.
    - file_sheet: Sheet name or identifier used in dataset configuration.
    - config_file: Configuration file path for dataset setup.
    - lower_threshold: Lower date threshold for filtering.
    - upper_threshold: Upper date threshold for filtering.
    
    Returns:
    DataFrame after applying data type transformations and date-based filtering.
    """
    # Ensure consistency in variable naming and parameter usage
    df_dataset = Dataset(configuration_file, file_sheet)
    df = pd.read_csv(file_path)
    dtype_trans(df, df_dataset)
    date_filtered_df = filter_df_by_date(df, df_dataset.dvars, lower_threshold, comparison_type='a')
    upper_date_filtered_df = filter_df_by_date(date_filtered_df, df_dataset.dvars, upper_threshold, comparison_type='b')
    
    sessions_num = 0
    
    for col in df_dataset.dvars:
        count_sessions = filter_df_by_date(filter_df_by_date(df, [col], lower_threshold, comparison_type='a'), [col], upper_threshold, comparison_type='b')[col].count()
        sessions_num += count_sessions 
        
    return sessions_num
    # return upper_date_filtered_df
    

In [None]:
paths_sheets = [(scr_path, scr_sheet), (int_path, int_sheet), (gc_path, gc_sheet), (ic_path, ic_sheet), (psfua_path, psfua_sheet), (pei_path, pei_sheet), (td_path, td_sheet), (cws_path, cws_sheet), (trw_path, trw_sheet), (aw_path, aw_sheet), (psfs_path, psfs_sheet), (ptint_path, ptint_sheet), (gpt_path, gpt_sheet), (ipt_path, ipt_sheet), (ptfua_path, ptfua_sheet)]

In [None]:
total_sessions_num = 0

In [None]:
for path_sheet in paths_sheets:
    path, sheet = path_sheet
    session_num = dataframize_csv(path, sheet, config_file, lower_threshold=down_threshold, upper_threshold=upper_threshold)
    total_sessions_num += session_num

print(total_sessions_num)