In [None]:
import numpy as np
import pandas as pd
import warnings
from my_analysis_toolkit import read_encrypted_excels
from my_static_data import tt_passwords, ptnt_columns, ptnt_basic_columns, ptnt_date_columns, ptnt_int_columns

In [None]:
# Avoid representing large numbers in scientific form. To reset, use the commented line.
pd.set_option('display.float_format', '{:.1f}'.format)
# pd.reset_option('display.float_format')

# Display maximum column width:
pd.set_option('display.max_colwidth', None)

# Suppress openpyxl data validation warning
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")

In [None]:
passwords = tt_passwords
tracking_tools = read_encrypted_excels(r'000', passwords) 

In [None]:
sheet = 'PT Int'
sp = 'ptntsp'
columns_names = ptnt_columns
basic_columns = ptnt_basic_columns
date_columns = ptnt_date_columns
int_columns = ptnt_int_columns

# Process PTNT Sheets

In [None]:
hr = tracking_tools['tt_pt_HR_v04.xlsx'][sheet]

In [None]:
hr

In [None]:
hr.columns = columns_names
hr = hr.drop(hr.index[0]).reset_index(drop=True)
hr.dropna(subset=basic_columns, how='all', inplace=True)
hr.insert(0, sp, 'HR')

In [None]:
hj = tracking_tools['tt_pt_HJ_v04.xlsx'][sheet]

In [None]:
hj.columns = columns_names
hj = hj.drop(hr.index[0]).reset_index(drop=True)
hj.dropna(subset=basic_columns, how='all', inplace=True)
hj.insert(0, sp, 'HJ')

In [None]:
ptnt_rows = hr.shape[0] + hj.shape[0]
ptnt_rows

In [None]:
ptnt_data = pd.concat([hr, hj], ignore_index=True)
ptnt_data

In [None]:
ptnt_data.to_csv(r'001/1011_pt_nt_092300.csv', index=False)

# PTNT Dataframe

In [None]:
ptnt_df = pd.read_csv(r'001/1011_pt_nt_092300.csv')
ptnt_df = ptnt_df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

In [None]:
ptnt_df.shape

In [None]:
ptnt_df.info()

In [None]:
ptnt_df

In [None]:
ptnt_df[int_columns] = ptnt_df[int_columns].astype('Int64')
ptnt_df[date_columns] = ptnt_df[date_columns].apply(pd.to_datetime)

In [None]:
ptnt_duplicates = ptnt_df[ptnt_df['fcid'].duplicated(keep=False)]
ptnt_duplicates

In [None]:
ptnt_df.to_csv(r'010/1011_pt_nt_092301.csv', index=False)

# PTNT Analysis Version

In [None]:
ptnt = pd.read_csv(r'010/1011_pt_nt_092301.csv')

In [None]:
ptnt[date_columns] = ptnt[date_columns].apply(pd.to_datetime)
ptnt[int_columns] = ptnt[int_columns].astype('Int64')
ptnt

In [None]:
ptntre_condition = (ptnt['ptnt1'].isna()) & (ptnt['ptnt2'].isna()) & (ptnt['ptntre'].notna())
ptntre = ptnt[ptntre_condition]
ptntre

In [None]:
# PTNT Analysis Complete Dataframes
ptnt.to_csv(r'010/1011_pt_nt_092302.csv', index=False)
ptntre.to_csv(r'010/1100_pt_re_092301.csv', index=False)

In [None]:
# PTNT Analysis Basic Dataframes (this dataframe for analysis work - without unnecessary variables)
basic_ptnt = ptnt.copy()
basic_ptntre = ptntre.copy()
basic_ptnt = basic_ptnt.drop(columns=['firstname', 'lastname', 'ptntre', 'ptntpsc', 'note'])
basic_ptntre = basic_ptntre.drop(columns=['firstname', 'lastname', 'ptnt1', 'ptnt2', 'ptntpsc', 'note'])
basic_ptnt.to_csv(r'011/1011_pt_nt_092310.csv', index=False)
basic_ptntre.to_csv(r'011/1100_pt_re_092310.csv', index=False)

In [None]:
# pd.read_csv(r'011/1011_pt_nt_092310.csv')
# pd.read_csv(r'011/1100_pt_re_092310.csv') 

### PTNT data is split into two dataframes, nt and re.

### PTNT DATA IS PREPARED AND READY