In [None]:
import numpy as np
import pandas as pd
import warnings
from my_analysis_toolkit import read_encrypted_excels, indexing_groups, repeated_client_indicator
from my_static_data import tt_passwords, ptgc_columns, ptgc_basic_columns, ptgc_date_columns, ptgc_int_columns

In [None]:
# Avoid representing large numbers in scientific form. To reset, use the commented line.
pd.set_option('display.float_format', '{:.1f}'.format)
# pd.reset_option('display.float_format')

# Display maximum column width:
pd.set_option('display.max_colwidth', None)

# Suppress openpyxl data validation warning
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")

In [None]:
passwords = tt_passwords
tracking_tools = read_encrypted_excels(r'000', passwords) 

In [None]:
sheet = 'GPT'
sp = 'ptgcsp'
columns_names = ptgc_columns
basic_columns = ptgc_basic_columns
date_columns = ptgc_date_columns
int_columns = ptgc_int_columns

# Process PTGC Sheets

In [None]:
hr = tracking_tools['tt_pt_HR_v04.xlsx'][sheet]

In [None]:
hr

In [None]:
hr.columns = columns_names
hr = hr.drop(hr.index[0]).reset_index(drop=True)
hr.dropna(subset=basic_columns, how='all', inplace=True)
hr.insert(0, sp, 'HR')

In [None]:
hj = tracking_tools['tt_pt_HJ_v04.xlsx'][sheet]

In [None]:
hj.columns = columns_names
hj = hj.drop(hr.index[0]).reset_index(drop=True)
hj.dropna(subset=basic_columns, how='all', inplace=True)
hj.insert(0, sp, 'HJ')

In [None]:
ptgc_rows = hr.shape[0] + hj.shape[0]
ptgc_rows

In [None]:
ptgc_data = pd.concat([hr, hj], ignore_index=True)
ptgc_data

In [None]:
ptgc_data.to_csv(r'001/1100_pt_gc_092300.csv', index=False)

# PTGC Dataframe

In [None]:
ptgc_df = pd.read_csv(r'001/1100_pt_gc_092300.csv')
ptgc_df = ptgc_df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
ptgc_df[int_columns] = ptgc_df[int_columns].astype('Int64')
ptgc_df[date_columns] = ptgc_df[date_columns].apply(pd.to_datetime)

In [None]:
ptgc_df.shape

In [None]:
ptgc_df.info()

In [None]:
ptgc_df

In [None]:
# ptnt_duplicates = ptgc_df[ptgc_df['fcid'].duplicated(keep=False)]
# ptnt_duplicates

In [None]:
ptgc_df.to_csv(r'010/1101_pt_gc_092301.csv', index=False)

# PTGC Analysis Version

In [None]:
ptgc = pd.read_csv(r'010/1101_pt_gc_092301.csv')
ptgc[date_columns] = ptgc[date_columns].apply(pd.to_datetime)
ptgc[int_columns] = ptgc[int_columns].astype('Int64')

In [None]:
ptgc

In [None]:
if 'ptgcndx' not in ptgc.columns:
    ptgc.insert(6, 'ptgcndx', np.nan)

ptgc['ptgcndx'] = indexing_groups(ptgc, date_columns)

#### - HJ-ptgci: 89 = !ptgc1: 2023-01-30 | $ptgc1: 2023-01-25

In [None]:
# Correcting the first pt gc session date.
ptgc.loc[ptgc['rid'] == 'R0486', 'ptgc1'] = pd.to_datetime('2023-01-25')

In [None]:
ptgc

### No Duplicated Clients

In [None]:
# ptgc[ptgc['rid'].duplicated(keep=False)]

In [None]:
# Encoding binary variables
ptgc['ptgcst'] = ptgc['ptgcst'].replace({
    'ACTIVE': 1,
    'DONE': 0,
    'Drop Out': -1
})

In [None]:
# PTGC Analysis Complete Dataframes
ptgc.to_csv(r'010/1101_pt_gc_092302.csv', index=False)

In [None]:
# PTGC Analysis Basic Dataframes (this dataframe for analysis work - without unnecessary variables)
basic_ptgc = ptgc.copy()
basic_ptgc = basic_ptgc.drop(columns=['firstname', 'lastname', 'note'])
basic_ptgc.to_csv(r'011/1101_pt_gc_092310.csv', index=False)

In [None]:
pd.read_csv(r'011/1101_pt_gc_092310.csv')

### PTGC DATA IS PREPARED AND READY