In [None]:
import numpy as np
import pandas as pd
import warnings
from my_analysis_toolkit import read_encrypted_excels, indexing_groups
from my_static_data import tt_passwords, pei_columns, pei_date_columns, pei_basic_columns, pei_int_columns

In [None]:
# Avoid representing large numbers in scientific form. To reset, use the commented line.
pd.set_option('display.float_format', '{:.1f}'.format)
# pd.reset_option('display.float_format')

# Display maximum column width:
pd.set_option('display.max_colwidth', None)

# Suppress openpyxl data validation warning
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")

In [None]:
# Static Code Variables
sheet = 'PEI'
sp = 'pesp'
int_columns = pei_int_columns
columns_names = pei_columns
basic_columns = pei_basic_columns
date_columns = pei_date_columns
passwords = tt_passwords

In [None]:
tracking_tools = read_encrypted_excels(r'000', passwords)

# Process PEI Sheets

In [None]:
yq = tracking_tools['tt_psc_YQ-v04.xlsx'][sheet]

In [None]:
yq

In [None]:
yq.columns = columns_names
yq = yq.drop(yq.index[0]).reset_index(drop=True)
yq.dropna(subset=basic_columns, how='all', inplace=True)
yq.insert(0, sp, 'YQ')

In [None]:
sa = tracking_tools['tt_psc_SA_v04.xlsx'][sheet]

In [None]:
sa.columns = columns_names
sa = sa.drop(sa.index[0]).reset_index(drop=True)
sa.dropna(subset=basic_columns, how='all', inplace=True)
sa.insert(0, sp, 'SA')

In [None]:
ij = tracking_tools['tt_psc_IJ_v04.xlsx'][sheet]

In [None]:
ij.columns = columns_names
ij = ij.drop(ij.index[0]).reset_index(drop=True)
ij.dropna(subset=basic_columns, how='all', inplace=True)
ij.insert(0, sp, 'IJ')

In [None]:
la = tracking_tools['tt_psc_LA_v04.xlsx'][sheet]

In [None]:
la.columns = columns_names
la = la.drop(la.index[0]).reset_index(drop=True)
la.dropna(subset=basic_columns, how='all', inplace=True)
la.insert(0, sp, 'LA')

In [None]:
mt = tracking_tools['tt_psc_MT_v04.xlsx'][sheet]

In [None]:
mt.columns = columns_names
mt = mt.drop(mt.index[0]).reset_index(drop=True)
mt.dropna(subset=basic_columns, how='all', inplace=True)
mt.insert(0, sp, 'MT')

In [None]:
pe_rows = mt.shape[0] + yq.shape[0] + sa.shape[0] + ij.shape[0] + la.shape[0]
pe_rows

In [None]:
pei = pd.concat([yq, sa, ij, la, mt], ignore_index=True)

In [None]:
pei

In [None]:
# All PEI csv file
pei.to_csv(r'001/0101_ps_pe_092300.csv', index=False)

#  PEI Dataframe

In [None]:
pe = pd.read_csv(r'001/0101_ps_pe_092300.csv')
pe[date_columns] = pe[date_columns].apply(pd.to_datetime)
pe = pe.applymap(lambda x: x.strip() if isinstance(x, str) else x)
pe[int_columns] = pe[int_columns].astype('Int64')

In [None]:
pe.shape

In [None]:
pe.info()

In [None]:
# First cleaned csv file
pe.to_csv(r'010/0101_ps_pe_092301.csv', index=False)

# PEI Analysis Version

In [None]:
post_earthquake = pd.read_csv(r'010/0101_ps_pe_092301.csv')
post_earthquake[date_columns] = post_earthquake[date_columns].apply(pd.to_datetime)
post_earthquake[int_columns] = post_earthquake[int_columns].astype('Int64')

In [None]:
post_earthquake.info()

In [None]:
post_earthquake

In [None]:
# Typo correction
post_earthquake['peloc'] = post_earthquake['peloc'].replace({
    'GTZ': 'GZT'
})

In [None]:
# Indexing PEI groups
if 'peindx' not in post_earthquake.columns:
    post_earthquake.insert(7, 'peindx', np.nan)
    
post_earthquake['peindx'] = indexing_groups(post_earthquake, pei_date_columns[:3])

In [None]:
# Checking PEI follow-up date, and define indicator for PEI case status - 1: service complete, -1: error follow-up date, 0: not completed.
if 'pest' not in post_earthquake.columns:
    post_earthquake.insert(7, 'pest', 0)
pei_status = [
    (post_earthquake['pef'].notna()) & (post_earthquake['pef'] > post_earthquake['pe3']),
    (post_earthquake['pef'].notna()) & (post_earthquake['pef'] <= post_earthquake['pe3']),
]
pei_status_values = [1, -1]
post_earthquake['pest'] = np.select(pei_status, pei_status_values)

In [None]:
# No PEI duplicated rows appeared.

In [None]:
# PEI Analysis Complete Dataframe
post_earthquake.to_csv(r'010/0101_ps_pe_092302.csv', index=False)

In [None]:
# PEI Analysis Basic Dataframe
basic_pe = post_earthquake.copy()
basic_pe = basic_pe.drop(columns=['firstname', 'lastname', 'note'])
basic_pe.to_csv(r'011/0101_ps_pe_092310.csv', index=False)

### PEI DATA PREPARED AND READY