# Introduction

In [1]:
import pandas as pd
import numpy as np
import itertools
import datetime
import plotly.express as px

In [2]:
list_sites = ['Delhi', 'Gurugram', 'Noida', 'Manesar', 'Greater Noida', 'Jaipur', 'Sonipat', 'Chandigarh']
list_devices = ['Walkin Chiller']
start_date = '2020-01-01'
end_date = '2020-12-31'
list_dates = pd.date_range(start_date, end_date).tolist()
list_hour = list(range(0,24))


In [3]:
#Create dataframe with all combinations
all_comb = [list_sites, list_devices, list_dates, list_hour]
df = pd.DataFrame(itertools.product(*all_comb), columns = ['Site', 'Device', 'Log.Date', 'Log.Hour'])
df.head()

Unnamed: 0,Site,Device,Log.Date,Log.Hour
0,Delhi,Walkin Chiller,2020-01-01,0
1,Delhi,Walkin Chiller,2020-01-01,1
2,Delhi,Walkin Chiller,2020-01-01,2
3,Delhi,Walkin Chiller,2020-01-01,3
4,Delhi,Walkin Chiller,2020-01-01,4


In [4]:
# Walkin Chiller possible issues
df['Temp'] = np.random.normal(loc = 4, scale = 0.7, size = df.shape[0])
df['Condition'] = 'Ideal'

In [6]:
# Add undercooling cases
iss_start = datetime.datetime.strptime('2020-12-10', '%Y-%m-%d')
iss_stop = datetime.datetime.strptime('2020-12-21', '%Y-%m-%d')
iss_site = 'Gurugram'

#print('df.shape', df.shape)
undercool_data = df.loc[(df['Site'] == iss_site) & (df['Log.Date'] >= iss_start) & (df['Log.Date'] <= iss_stop)]
df = df.loc[~((df['Site'] == iss_site) & (df['Log.Date'] >= iss_start) & (df['Log.Date'] <= iss_stop))]
#print('und.shape', undercool_data.shape)
#print('df.shape', df.shape)

undercool_data['Temp'] += 5
undercool_data['Condition'] = 'Undercool'

df = pd.concat([df, undercool_data])

#plot
fig = px.line(undercool_data, x='Log.Hour', y='Temp', color='Log.Date')
fig.show()

In [7]:
# Add overcooling cases
iss_start = datetime.datetime.strptime('2020-12-25', '%Y-%m-%d')
iss_stop = datetime.datetime.strptime('2020-12-30', '%Y-%m-%d')
iss_site = 'Manesar'

overcool_data = df.loc[(df['Site'] == iss_site) & (df['Log.Date'] >= iss_start) & (df['Log.Date'] <= iss_stop)]
df = df.loc[~((df['Site'] == iss_site) & (df['Log.Date'] >= iss_start) & (df['Log.Date'] <= iss_stop))]

overcool_data['Temp'] -= 5
overcool_data['Condition'] = 'Overcool'

df = pd.concat([df, overcool_data])

#plot
fig = px.line(overcool_data, x='Log.Hour', y='Temp', color='Log.Date')
fig.show()

In [8]:
# Add unhealthy cases
iss_start = datetime.datetime.strptime('2020-12-05', '%Y-%m-%d')
iss_stop = datetime.datetime.strptime('2020-12-13', '%Y-%m-%d')
iss_site = 'Noida'

unhealthy_data = df.loc[(df['Site'] == iss_site) & (df['Log.Date'] >= iss_start) & (df['Log.Date'] <= iss_stop)]
df = df.loc[~((df['Site'] == iss_site) & (df['Log.Date'] >= iss_start) & (df['Log.Date'] <= iss_stop))]

unhealthy_data['Temp'] += 15
unhealthy_data['Condition'] = 'Unhealthy'

df = pd.concat([df, unhealthy_data])

#plot
fig = px.line(unhealthy_data, x='Log.Hour', y='Temp', color='Log.Date')
fig.show()

In [9]:
# Add sensor faulty cases
iss_start = datetime.datetime.strptime('2020-12-18', '%Y-%m-%d')
iss_stop = datetime.datetime.strptime('2020-12-22', '%Y-%m-%d')
iss_site = 'Greater Noida'

senissue_data = df.loc[(df['Site'] == iss_site) & (df['Log.Date'] >= iss_start) & (df['Log.Date'] <= iss_stop)]
df = df.loc[~((df['Site'] == iss_site) & (df['Log.Date'] >= iss_start) & (df['Log.Date'] <= iss_stop))]

senissue_data['Temp'] += 35
senissue_data['Condition'] = 'Sensor Faulty'

df = pd.concat([df, senissue_data])

#plot
fig = px.line(senissue_data, x='Log.Hour', y='Temp', color='Log.Date')
fig.show()

In [10]:
# Add off in Night cases
iss_start = datetime.datetime.strptime('2020-12-02', '%Y-%m-%d')
iss_stop = datetime.datetime.strptime('2020-12-18', '%Y-%m-%d')
iss_site = 'Delhi'

nbh_off_data = df.loc[(df['Site'] == iss_site) & (df['Log.Date'] >= iss_start) & (df['Log.Date'] <= iss_stop)]
df = df.loc[~((df['Site'] == iss_site) & (df['Log.Date'] >= iss_start) & (df['Log.Date'] <= iss_stop))]

nbh_off_data.loc[nbh_off_data['Log.Hour'] <=10, 'Temp'] += 15
nbh_off_data['Condition'] = 'NBH Off'

df = pd.concat([df, nbh_off_data])

#plot
fig = px.line(nbh_off_data, x='Log.Hour', y='Temp', color='Log.Date')
fig.show()

In [11]:
# Add door indiscipline cases
iss_start = datetime.datetime.strptime('2020-12-12', '%Y-%m-%d')
iss_stop = datetime.datetime.strptime('2020-12-25', '%Y-%m-%d')
iss_sites = ['Jaipur']
iss_dates = pd.date_range(iss_start, iss_stop).tolist()

drissue_data = df.loc[(df['Site'].isin(iss_sites)) & (df['Log.Date'] >= iss_start) & (df['Log.Date'] <= iss_stop)]
df = df.loc[~((df['Site'].isin(iss_sites)) & (df['Log.Date'] >= iss_start) & (df['Log.Date'] <= iss_stop))]

drissue_data['Condition'] = 'Door Indiscipline'

# Dataframe representing Temp spike instances
iss_df =  pd.DataFrame(itertools.product(*[iss_sites, iss_dates]), columns = ['Site', 'Log.Date'])
iss_df['Log.Hour'] = np.random.choice(range(8,22),iss_df.shape[0])
#iss_df

for i in range(iss_df.shape[0]):
    r = iss_df.loc[i]
    drissue_data.loc[(drissue_data['Site'] == r['Site']) & (drissue_data['Log.Date'] == r['Log.Date']) & 
                     (drissue_data['Log.Hour'] == r['Log.Hour']), 'Temp'] += 5
    

df = pd.concat([df, drissue_data])

#plot
fig = px.line(drissue_data, x='Log.Hour', y='Temp', color='Log.Date')
fig.show()

In [12]:
#plot occurence of all issues
fig = px.histogram(df, x="Condition", color="Site")
fig.show()

In [25]:
#Get number of months
df['Month'] = df['Log.Date'].dt.month_name()
df['Year'] = df['Log.Date'].dt.year

df['RepDate'] = '1-' + df['Month'] + df['Year'].astype('str')
df['RepDate'] = pd.to_datetime(df['RepDate'], format='%d-%B%Y')
df.head()

Unnamed: 0,Site,Device,Log.Date,Log.Hour,Temp,Condition,Month,Year,RepDate
0,Delhi,Walkin Chiller,2020-01-01,0,3.712284,Ideal,January,2020,2020-01-01
1,Delhi,Walkin Chiller,2020-01-01,1,3.2429,Ideal,January,2020,2020-01-01
2,Delhi,Walkin Chiller,2020-01-01,2,3.511143,Ideal,January,2020,2020-01-01
3,Delhi,Walkin Chiller,2020-01-01,3,4.634449,Ideal,January,2020,2020-01-01
4,Delhi,Walkin Chiller,2020-01-01,4,5.386477,Ideal,January,2020,2020-01-01


In [29]:
month_df = df.groupby(['Site', 'Device', 'RepDate']).agg({'Temp': 'mean'})
month_df.reset_index(inplace=True)
month_df.head()

Unnamed: 0,Site,Device,RepDate,Temp
0,Chandigarh,Walkin Chiller,2020-01-01,3.976773
1,Chandigarh,Walkin Chiller,2020-02-01,4.039595
2,Chandigarh,Walkin Chiller,2020-03-01,4.014187
3,Chandigarh,Walkin Chiller,2020-04-01,3.962592
4,Chandigarh,Walkin Chiller,2020-05-01,3.981598


In [30]:
#dump created data
df.to_csv('HourlyData.csv', index=False)
month_df.to_csv('MonthlyData.csv', index=False)