In [None]:
import numpy as np
import pandas as pd
import warnings
from my_analysis_toolkit import read_encrypted_excels, indexing_groups, generate_binary_sequence, repeated_client_indicator
from my_static_data import tt_passwords, group_counseling_columns, group_counseling_date_columns, group_counseling_basic_date_columns, group_counseling_basic_columns

In [None]:
# Avoid representing large numbers in scientific form. To reset, use the commented line.
pd.set_option('display.float_format', '{:.1f}'.format)
# pd.reset_option('display.float_format')

# Display maximum column width:
pd.set_option('display.max_colwidth', None)

# Suppress openpyxl data validation warning
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")

In [None]:
# Static Code Variables
sheet = 'GC'
sp = 'gcsp'
int_columns = ['gcspi', 'fcid', 'gcindx', 'gct']
columns_names = group_counseling_columns
basic_columns = group_counseling_basic_columns
all_date_columns = group_counseling_date_columns
date_columns = group_counseling_basic_date_columns
passwords = tt_passwords

In [None]:
tracking_tools = read_encrypted_excels(r'000', passwords)

# Process Group Counseling Sheets

In [None]:
yq = tracking_tools['tt_psc_YQ-v04.xlsx'][sheet]

Record no. 26? All data are NaN except for note.

In [None]:
yq.columns = columns_names
yq = yq.drop(yq.index[0]).reset_index(drop=True)
yq.dropna(subset=basic_columns, how='all', inplace=True)
yq.insert(0, sp, 'YQ')

In [None]:
sa = tracking_tools['tt_psc_SA_v04.xlsx'][sheet]

In [None]:
sa.columns = columns_names
sa = sa.drop(sa.index[0]).reset_index(drop=True)
sa.dropna(subset=basic_columns, how='all', inplace=True)
sa.insert(0, sp, 'SA')

In [None]:
ij = tracking_tools['tt_psc_IJ_v04.xlsx'][sheet]

In [None]:
ij.columns = columns_names
ij = ij.drop(ij.index[0]).reset_index(drop=True)
ij.dropna(subset=basic_columns, how='all', inplace=True)
ij.insert(0, sp, 'IJ')

In [None]:
la = tracking_tools['tt_psc_LA_v04.xlsx'][sheet]

In [None]:
la.columns = columns_names
la = la.drop(la.index[0]).reset_index(drop=True)
la.dropna(subset=basic_columns, how='all', inplace=True)
la.insert(0, sp, 'LA')

In [None]:
mt = tracking_tools['tt_psc_MT_v04.xlsx'][sheet]

In [None]:
mt.columns = columns_names
mt = mt.drop(mt.index[0]).reset_index(drop=True)
mt.dropna(subset=basic_columns, how='all', inplace=True)
mt.insert(0, sp, 'MT')

In [None]:
gc_rows = mt.shape[0] + yq.shape[0] + sa.shape[0] + ij.shape[0] + la.shape[0]
gc_rows

In [None]:
group_counseling = pd.concat([yq, sa, ij, la, mt], ignore_index=True)

In [None]:
group_counseling

In [None]:
# All group counseling csv file
group_counseling.to_csv(r'001/0010_ps_gc_092300.csv', index=False)

# Group Counseling Dataframe

In [None]:
gc = pd.read_csv(r'001/0010_ps_gc_092300.csv')
gc[all_date_columns] = gc[all_date_columns].apply(pd.to_datetime)
gc = gc.applymap(lambda x: x.strip() if isinstance(x, str) else x)

In [None]:
gc[int_columns] = gc[int_columns].astype('Int64')

In [None]:
gc.shape

In [None]:
gc.info()

In [None]:
gc

In [None]:
# First cleaned csv file
gc.to_csv(r'010/0010_ps_gc_092301.csv', index=False)

# Group Counseling Analysis Version

In [None]:
gcl = pd.read_csv(r'010/0010_ps_gc_092301.csv')
gcl[all_date_columns] = gcl[all_date_columns].apply(pd.to_datetime)
gcl[int_columns] = gcl[int_columns].astype('Int64')

In [None]:
# Deleting mistakenly added records 
R0187 = gcl[(gcl['gcsp'] == 'SA') & (gcl['fcid'] == 8110050030)].index
gcl = gcl.drop(R0187)

In [None]:
gcl.info()

In [None]:
gcl

In [None]:
# Encoding binary variables - PT Need
gcl['ptn'] = gcl['ptn'].replace({
    'No PT': 0,
    'GPT': 1,
    'IPT': 2
}).astype('Int64')

In [None]:
# Index GC Groups
gcl['gcindx'] = indexing_groups(gcl.copy(), group_counseling_basic_date_columns)

In [None]:
gcl[gcl['fcid'].duplicated(keep=False)]

In [None]:
# Indicating the duplicated records in the data, using repeated_client_indicator function. 
if 'dupind' not in gcl.columns:
    gcl.insert(6, 'dupind', np.nan)
gcl['dupind'] = repeated_client_indicator(gcl.copy(), 'fcid', 'gcindx')

In [None]:
# Group Counseling Analysis Complete Dataframe
gcl.to_csv(r'010/0010_ps_gc_092302.csv', index=False)

In [None]:
# Intake Analysis Basic Dataframe
basic_gcl = gcl.copy()
basic_gcl = basic_gcl.drop(columns=['firstname', 'lastname', 'note'])
basic_gcl.to_csv(r'011/0010_ps_gc_092310.csv', index=False)