In [None]:
import numpy as np
import pandas as pd
import warnings

In [None]:
from Scripts.data_reader import *
from Scripts.parse_funcs import *
from Scripts.myfunctions import *

In [None]:
# Avoid representing large numbers in scientific form. To reset, use the commented line.
pd.set_option('display.float_format', '{:.1f}'.format)
# pd.reset_option('display.float_format')

# Display maximum column width:
pd.set_option('display.max_colwidth', None)

# Suppress openpyxl data validation warning
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")

In [None]:
path_to_ingredients_file = r'../Config/ingredients.yaml'
path_to_excels_files_directory = r'../Data/Raw/1023'

In [None]:
features = YAMLDataReader(path_to_ingredients_file)
passwords = features.get_excel_passwords_by_directory(path_to_excels_files_directory)

In [None]:
sheet = 'TD'
dataset = 'td'
sp = 'tdsp'
columns_names = features.get_variable_names_by_dataset(dataset)
basic_columns = features.get_variable_names_by_dataset_and_level(dataset, 0)
date_columns = features.get_variable_names_by_dataset_and_type(dataset, 'datetime64[ns]')
num_columns = features.get_variable_names_by_dataset_and_type(dataset, 'Int64')

In [None]:
decryptor = ExcelDecryptor(path_to_excels_files_directory, passwords)
tracking_tools = decryptor.read_encrypted_excels()

In [None]:
decryptor.print_data_structure

# Process TD Sheets

In [None]:
yq = tracking_tools['tt_psc_YQ-v04.xlsx'][sheet]

In [None]:
yq.columns = columns_names
yq = yq.drop(yq.index[0]).reset_index(drop=True)
yq.dropna(subset=basic_columns, how='all', inplace=True)
yq.insert(0, sp, 'YQ')

In [None]:
sa = tracking_tools['tt_psc_SA_v04.xlsx'][sheet]

In [None]:
sa.columns = columns_names
sa = sa.drop(sa.index[0]).reset_index(drop=True)
sa.dropna(subset=basic_columns, how='all', inplace=True)
sa.insert(0, sp, 'SA')

In [None]:
ij = tracking_tools['tt_psc_IJ_v04.xlsx'][sheet]

In [None]:
ij.columns = columns_names
ij = ij.drop(ij.index[0]).reset_index(drop=True)
ij.dropna(subset=basic_columns, how='all', inplace=True)
ij.insert(0, sp, 'IJ')

In [None]:
la = tracking_tools['tt_psc_LA_v04.xlsx'][sheet]

In [None]:
la.columns = columns_names
la = la.drop(la.index[0]).reset_index(drop=True)
la.dropna(subset=basic_columns, how='all', inplace=True)
la.insert(0, sp, 'LA')

In [None]:
mt = tracking_tools['tt_psc_MT_v04.xlsx'][sheet]

In [None]:
mt.columns = columns_names
mt = mt.drop(mt.index[0]).reset_index(drop=True)
mt.dropna(subset=basic_columns, how='all', inplace=True)
mt.insert(0, sp, 'MT')

In [None]:
td_rows = mt.shape[0] + yq.shape[0] + sa.shape[0] + ij.shape[0] + la.shape[0]
td_rows

In [None]:
therapeutic_documentation = pd.concat([yq, sa, ij, la, mt], ignore_index=True)

In [None]:
therapeutic_documentation

In [None]:
# All TD csv file
therapeutic_documentation.to_csv(r'../Data/Processed/1023/pstd_1023.csv', index=False)

#  TD Dataframe

In [None]:
td_data = pd.read_csv(r'../Data/Processed/1023/pstd_1023.csv')
td_data = td_data.applymap(lambda x: x.strip() if isinstance(x, str) else x)

In [None]:
td_data.shape

In [None]:
td_data.info()

In [None]:
td_data

# TD Analysis Version

In [None]:
td = pd.read_csv(r'010/0111_ps_td_092301.csv')
td[date_columns] = td[date_columns].apply(pd.to_datetime)
td[int_columns] = td[int_columns].astype('Int64')

In [None]:
td.info()

In [None]:
td

In [None]:
#  TD Basic Sessions validation and binary indication
if 'tdcc' not in td.columns:
    td.insert(6, 'tdcc', np.nan)
    
td['tdcc'] = compare_date_columns(td, 'cslt', 'csnt')

In [None]:
# Encoding TD pathways
td[td_pathways] = td[td_pathways].replace({
    'Yes': 1,
    'No': 0
}).astype('Int64')

In [None]:
if 'tdis' not in td.columns:
    td.insert(14, 'tdis', np.nan)

td['tdis'] = create_binary_pattern(td, 'is1', 'is3')

In [None]:
# No TD duplicated rows appeared.

In [None]:
# TD Analysis Complete Dataframe
td.to_csv(r'010/0111_ps_td_092302.csv', index=False)

In [None]:
# TD Analysis Basic Dataframe
basic_td = td.copy()
basic_td = basic_td.drop(columns=['firstname', 'lastname', 'note'])
basic_td.to_csv(r'011/0111_ps_td_092310.csv', index=False)

### TD DATA PREPARED AND READY