In [1]:
import numpy as np
import pandas as pd
import warnings
from my_analysis_toolkit import read_encrypted_excels, indexing_groups
from my_static_data import tt_passwords, trw_columns, trw_date_columns, trw_basic_columns, trw_int_columns

In [2]:
# Avoid representing large numbers in scientific form. To reset, use the commented line.
pd.set_option('display.float_format', '{:.1f}'.format)
# pd.reset_option('display.float_format')

# Display maximum column width:
pd.set_option('display.max_colwidth', None)

# Suppress openpyxl data validation warning
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")

In [3]:
# Static Code Variables
sheet = 'TRW'
sp = 'trsp'
passwords = tt_passwords
columns_names = trw_columns
basic_columns = trw_basic_columns
date_columns = trw_date_columns
int_columns = trw_int_columns

In [None]:
tracking_tools = read_encrypted_excels(r'000', passwords)

# Process TRW Sheets

In [None]:
yq = tracking_tools['tt_psc_YQ-v04.xlsx'][sheet]

In [None]:
yq

In [None]:
yq.columns = columns_names
yq = yq.drop(yq.index[0]).reset_index(drop=True)
yq.dropna(subset=basic_columns, how='all', inplace=True)
yq.insert(0, sp, 'YQ')

In [None]:
sa = tracking_tools['tt_psc_SA_v04.xlsx'][sheet]

In [None]:
sa.columns = columns_names
sa = sa.drop(sa.index[0]).reset_index(drop=True)
sa.dropna(subset=basic_columns, how='all', inplace=True)
sa.insert(0, sp, 'SA')

In [None]:
ij = tracking_tools['tt_psc_IJ_v04.xlsx'][sheet]

In [None]:
ij.columns = columns_names
ij = ij.drop(ij.index[0]).reset_index(drop=True)
ij.dropna(subset=basic_columns, how='all', inplace=True)
ij.insert(0, sp, 'IJ')

In [None]:
la = tracking_tools['tt_psc_LA_v04.xlsx'][sheet]

In [None]:
la.columns = columns_names
la = la.drop(la.index[0]).reset_index(drop=True)
la.dropna(subset=basic_columns, how='all', inplace=True)
la.insert(0, sp, 'LA')

In [None]:
mt = tracking_tools['tt_psc_MT_v04.xlsx'][sheet]

In [None]:
mt.columns = columns_names
mt = mt.drop(mt.index[0]).reset_index(drop=True)
mt.dropna(subset=basic_columns, how='all', inplace=True)
mt.insert(0, sp, 'MT')

In [None]:
pe_rows = mt.shape[0] + yq.shape[0] + sa.shape[0] + ij.shape[0] + la.shape[0]
pe_rows

In [None]:
tr_workshop = pd.concat([yq, sa, ij, la, mt], ignore_index=True)

In [None]:
tr_workshop

In [None]:
# All TRW csv file
tr_workshop.to_csv(r'001/0110_ps_tr_092300.csv', index=False)

#  TRW Dataframe

In [4]:
trw = pd.read_csv(r'001/0110_ps_tr_092300.csv')
trw[date_columns] = trw[date_columns].apply(pd.to_datetime)
trw = trw.applymap(lambda x: x.strip() if isinstance(x, str) else x)
trw[int_columns] = trw[int_columns].astype('Int64')

In [5]:
trw.shape

(80, 12)

In [6]:
trw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 80 entries, 0 to 79
Data columns (total 12 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   trsp       80 non-null     object        
 1   trspi      80 non-null     Int64         
 2   rid        80 non-null     object        
 3   fcid       14 non-null     Int64         
 4   firstname  80 non-null     object        
 5   lastname   80 non-null     object        
 6   tr1        70 non-null     datetime64[ns]
 7   tr2        64 non-null     datetime64[ns]
 8   trt        80 non-null     Int64         
 9   fac1       34 non-null     object        
 10  fac2       34 non-null     object        
 11  note       6 non-null      object        
dtypes: Int64(3), datetime64[ns](2), object(7)
memory usage: 7.9+ KB


In [7]:
# First cleaned csv file
trw.to_csv(r'010/0110_ps_tr_092301.csv', index=False)

# TRW Analysis Version

In [8]:
tr = pd.read_csv(r'010/0110_ps_tr_092301.csv')
tr[date_columns] = tr[date_columns].apply(pd.to_datetime)
tr[int_columns] = tr[int_columns].astype('Int64')

In [9]:
tr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 80 entries, 0 to 79
Data columns (total 12 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   trsp       80 non-null     object        
 1   trspi      80 non-null     Int64         
 2   rid        80 non-null     object        
 3   fcid       14 non-null     Int64         
 4   firstname  80 non-null     object        
 5   lastname   80 non-null     object        
 6   tr1        70 non-null     datetime64[ns]
 7   tr2        64 non-null     datetime64[ns]
 8   trt        80 non-null     Int64         
 9   fac1       34 non-null     object        
 10  fac2       34 non-null     object        
 11  note       6 non-null      object        
dtypes: Int64(3), datetime64[ns](2), object(7)
memory usage: 7.9+ KB


In [14]:
tr

Unnamed: 0,trsp,trspi,rid,fcid,firstname,lastname,trindx,tr1,tr2,trt,fac1,fac2,note
0,YQ,1,R0003,8110020015,عبير,خطيب,1,2021-10-25,2021-11-01,2,,,
1,YQ,2,R0005,8110020021,فاطمة,العلي,1,2021-10-25,2021-11-01,2,,,
2,YQ,3,R0029,8110020036,ميساء,الجمعة,1,2021-10-25,2021-11-01,2,,,
3,YQ,4,R0089,8110060012,حنان,جزار,1,2021-10-25,2021-11-01,2,,,
4,YQ,5,R0202,,وفاء,مشاعل,2,2022-03-23,2022-03-28,2,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,IJ,28,R0327,8110020095,شريفة,احمد ابراهيم,8,2023-08-24,2023-08-25,2,Israa,Hiba,تم ضمها للورشة لان موعد الجلسات بعيد
74,IJ,29,R0326,,اسماء,عزيزة,8,2023-08-24,2023-08-25,2,Israa,Hiba,
75,IJ,30,R0631,,ليلى,شاهين,8,NaT,2023-08-25,1,Israa,Hiba,
76,IJ,31,R0354,,الاء,الجلم,8,2023-08-24,2023-08-25,2,Israa,Hiba,


In [11]:
# Indexing PEI groups
if 'trindx' not in tr.columns:
    tr.insert(6, 'trindx', np.nan)
    
tr['trindx'] = indexing_groups(tr, trw_date_columns)

In [13]:
# Delete clients didn't appear from data
tr = tr.drop(
    tr[tr['trindx'] == 0].index
)

In [17]:
# No TRW duplicated rows appeared.

In [18]:
# TRW Analysis Complete Dataframe
tr.to_csv(r'010/0110_ps_tr_092302.csv', index=False)

In [19]:
# TRW Analysis Basic Dataframe
basic_tr = tr.copy()
basic_tr = basic_tr.drop(columns=['firstname', 'lastname', 'fac1', 'fac2','note'])
basic_tr.to_csv(r'011/0110_ps_tr_092310.csv', index=False)

### TRW DATA PREPARED AND READY