# Pre-processing Notebook 3 for 90 days clients data 
<br>
<em> Word counts: From GettingStarted.ipynb (master branch) each of the 32 buckets has a number of associated words, the counts of these words were extarcted from the logs in order to maintain data anonymization and still provide useful insights from the logs: Addiction,Bar,Biometrics,Brawl etc. Daily Aggregated Sum of the individual word counts is computed for each day in this notebook.</em>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import scipy as sci
import scipy.special as scisp
import scipy.stats as scist
import datetime, copy, imp, sys
sys.path.append('../../lib')
from tqdm.auto import tqdm, trange
from tqdm.notebook import tqdm
tqdm.pandas()
plt.ion()

In [None]:
validClientsFirst90DaysDf2 = pd.read_hdf('validClientsFirst90DaysDf2.h5') # Read the filtered 90 days

In [None]:
validClientsFirst90DaysDf2.head()

### Feature engineering of word counts
- All functions which are Aggregates (sum) follow the naming convention DailyAggXYZ
- The aggregations are split into seprate functions due to constraints of the RAM memory
- The computed individual aggregates are intially stored in temporary variables and then are appended to a single dataframe (tempMergeNonCategorical) sequently

In [None]:
def DailyAggAddiction(tbl): # summing all the word Counts in this notebook 
    dayVal = tbl.Addiction.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggBar(tbl):
    dayVal = tbl.Bar.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggBiometrics(tbl):
    dayVal = tbl.Biometrics.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggBrawl(tbl):
    dayVal = tbl.Brawl.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggCPS(tbl):
    dayVal = tbl.CPS.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggConflict(tbl):
    dayVal = tbl.Conflict.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggDeath(tbl):
    dayVal = tbl.Death.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggEMS(tbl):
    dayVal = tbl.EMS.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggEducation(tbl):
    dayVal = tbl.Education.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggEmployment(tbl):
    dayVal = tbl.Employment.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggFinancial(tbl):
    dayVal = tbl.Financial.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggFriendsFamily(tbl):
    dayVal = tbl.FriendsFamily.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggGun(tbl):
    dayVal = tbl.Gun.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggHealth(tbl):
    dayVal = tbl.Health.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggHousing(tbl):
    dayVal = tbl.Housing.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggID(tbl):
    dayVal = tbl.ID.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggIndigenous(tbl):
    dayVal = tbl.Indigenous.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggJustice(tbl):
    dayVal = tbl.Justice.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggKnife(tbl):
    dayVal = tbl.Knife.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggMedication(tbl):
    dayVal = tbl.Medication.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggMentalHealth(tbl):
    dayVal = tbl.MentalHealth.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggNegativeWord(tbl):
    dayVal = tbl.NegativeWord.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggOverdose(tbl):
    dayVal = tbl.Overdose.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggPhysicalHealth(tbl):
    dayVal = tbl.PhysicalHealth.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggPhysicalViolence(tbl):
    dayVal = tbl.PhysicalViolence.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggPositiveWord(tbl):
    dayVal = tbl.PositiveWord.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggProperty(tbl):
    dayVal = tbl.Property.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggSeniors(tbl):
    sen = tbl.Seniors.groupby(tbl.index.date).sum().reset_index()
    return sen
def DailyAggSexualViolence(tbl):
    dayVal = tbl.SexualViolence.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggSpray(tbl):
    dayVal = tbl.Spray.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggSupports(tbl):
    dayVal = tbl.Supports.groupby(tbl.index.date).sum().reset_index()
    return dayVal
def DailyAggWeapon(tbl):
    dayVal = tbl.Weapon.groupby(tbl.index.date).sum().reset_index()
    return dayVal

In [None]:
validClientsFirst90DaysDf2 = validClientsFirst90DaysDf2.set_index('Date')

In [None]:
tempFuncTbl1 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggAddiction)
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggBar)

In [None]:
tempDf = pd.DataFrame(tempFuncTbl1)
tempDf = tempDf.reset_index(level=[0,1])
tempDf['level_1']=tempDf['level_1']+1
tempDf = tempDf.rename(columns={'level_1':'Ind','index':'Date'})
tempDf = tempDf.set_index(['ClientId','Ind'])

In [None]:
tempDf.head(5)

In [None]:
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])

In [None]:
tempDf2.head(5)

In [None]:
tempDf2 = tempDf2.drop(columns=['Date'])  # to have only one Date column and to avoid suffixing while join
tempMergeNonCategorical=tempDf.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggBiometrics)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical=tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggBrawl)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical = tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggCPS)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical = tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggConflict)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical = tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggDeath)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical = tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggEMS)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical = tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggEducation)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical = tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggEmployment)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical = tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggFinancial)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical = tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggFriendsFamily)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical = tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggGun)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical = tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggHealth)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical = tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggHousing)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical = tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggID)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical = tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggIndigenous)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical = tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggJustice)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical=tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggKnife)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical=tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggMedication)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical=tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggMentalHealth)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical=tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggNegativeWord)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical=tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggOverdose)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical=tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggPhysicalHealth)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical=tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggPhysicalViolence)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical=tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggPositiveWord)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical=tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggProperty)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical=tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggSeniors)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical=tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggSexualViolence)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical=tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggSpray)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical=tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggSupports)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical=tempMergeNonCategorical.join(tempDf2, how ='left')

In [None]:
tempFuncTbl2 = validClientsFirst90DaysDf2.groupby("ClientId").progress_apply(DailyAggWeapon)
tempDf2 = pd.DataFrame(tempFuncTbl2)
tempDf2 = tempDf2.reset_index(level=[0,1])
tempDf2['level_1']=tempDf2['level_1']+1
tempDf2 = tempDf2.rename(columns={'level_1':'Ind','index':'Date'})
tempDf2 = tempDf2.set_index(['ClientId','Ind'])
tempDf2 = tempDf2.drop(columns=['Date'])
tempMergeNonCategorical=tempMergeNonCategorical.join(tempDf2, how ='left')

### Non-categorical features
- Word counts are Non-Categorial features
- There are other non-categorical features (non-word counts) also which are pre-processed along with the Categorical features in the preProcessing_mergeAll_90_Days_From_First_Sleep_Date

In [None]:
tempMergeNonCategorical

<h3> Saving the pre-processed word counts to disc </h3>

In [None]:
tempMergeNonCategorical.to_hdf('mergedCounts_90DaysDf2.h5',key='df',mode='w')