In [130]:
# https://preppindata.blogspot.com/2021/07/2021-week-30-lift-your-spirits.html

import pandas as pd
import numpy as np
from datetime import date

### Input the Data

In [131]:
df = pd.read_csv(r'data\PD 2021 Wk 30 Input.csv')
df.head()

Unnamed: 0,Hour,Minute,From,To
0,0,1,G,8
1,0,2,4,G
2,0,2,11,G
3,0,3,B,G
4,0,4,1,G


### Create a TripID field based on the time of day
- Assume all trips took place on 12th July 2021

In [132]:
df['TipID'] = '20210712' +' '+ df['Hour'].astype(str) + ':' + df['Minute'].astype(str)
df['TipID'] = pd.to_datetime(df['TipID'])
df = df.sort_values(by='TipID')
df['ID'] = df.index+1
df

Unnamed: 0,Hour,Minute,From,To,TipID,ID
0,0,1,G,8,2021-07-12 00:01:00,1
1,0,2,4,G,2021-07-12 00:02:00,2
2,0,2,11,G,2021-07-12 00:02:00,3
3,0,3,B,G,2021-07-12 00:03:00,4
4,0,4,1,G,2021-07-12 00:04:00,5
...,...,...,...,...,...,...
1973,23,56,9,G,2021-07-12 23:56:00,1974
1974,23,56,G,1,2021-07-12 23:56:00,1975
1976,23,58,4,G,2021-07-12 23:58:00,1977
1975,23,58,2,7,2021-07-12 23:58:00,1976


### Calculate how many floors the lift has to travel between trips
- The order of floors is B, G, 1, 2, 3, etc.

In [133]:
df['From'] = pd.to_numeric(df['From'], errors='coerce').fillna(df['From'].map({'B': -1, 'G': 0}))
df['To'] = pd.to_numeric(df['To'], errors='coerce').fillna(df['To'].map({'B': -1, 'G': 0}))
df

Unnamed: 0,Hour,Minute,From,To,TipID,ID
0,0,1,0.0,8.0,2021-07-12 00:01:00,1
1,0,2,4.0,0.0,2021-07-12 00:02:00,2
2,0,2,11.0,0.0,2021-07-12 00:02:00,3
3,0,3,-1.0,0.0,2021-07-12 00:03:00,4
4,0,4,1.0,0.0,2021-07-12 00:04:00,5
...,...,...,...,...,...,...
1973,23,56,9.0,0.0,2021-07-12 23:56:00,1974
1974,23,56,0.0,1.0,2021-07-12 23:56:00,1975
1976,23,58,4.0,0.0,2021-07-12 23:58:00,1977
1975,23,58,2.0,7.0,2021-07-12 23:58:00,1976


In [134]:
df['Floors'] = abs(df['From'].shift(-1) - df['To'])
df

Unnamed: 0,Hour,Minute,From,To,TipID,ID,Floors
0,0,1,0.0,8.0,2021-07-12 00:01:00,1,4.0
1,0,2,4.0,0.0,2021-07-12 00:02:00,2,11.0
2,0,2,11.0,0.0,2021-07-12 00:02:00,3,1.0
3,0,3,-1.0,0.0,2021-07-12 00:03:00,4,1.0
4,0,4,1.0,0.0,2021-07-12 00:04:00,5,10.0
...,...,...,...,...,...,...,...
1973,23,56,9.0,0.0,2021-07-12 23:56:00,1974,0.0
1974,23,56,0.0,1.0,2021-07-12 23:56:00,1975,3.0
1976,23,58,4.0,0.0,2021-07-12 23:58:00,1977,2.0
1975,23,58,2.0,7.0,2021-07-12 23:58:00,1976,7.0


### Calculate which floor the majority of trips begin at - call this the Default Position

In [135]:
#value_counts method will return max count, for each floor as index
Default_Position = df['From'].value_counts().index[0]

### If every trip began from the same floor, how many floors would the lift need to travel to begin each journey?
- e.g. if the default position of the lift were floor 2 and the trip was starting from the 4th floor, this would be 2 floors that the lift would need to travel

In [136]:
df['Default_Position'] = Default_Position
df['travel from default position'] = abs(df['From'].shift(-1) - df['Default_Position'])
df

Unnamed: 0,Hour,Minute,From,To,TipID,ID,Floors,Default_Position,travel from default position
0,0,1,0.0,8.0,2021-07-12 00:01:00,1,4.0,0.0,4.0
1,0,2,4.0,0.0,2021-07-12 00:02:00,2,11.0,0.0,11.0
2,0,2,11.0,0.0,2021-07-12 00:02:00,3,1.0,0.0,1.0
3,0,3,-1.0,0.0,2021-07-12 00:03:00,4,1.0,0.0,1.0
4,0,4,1.0,0.0,2021-07-12 00:04:00,5,10.0,0.0,10.0
...,...,...,...,...,...,...,...,...,...
1973,23,56,9.0,0.0,2021-07-12 23:56:00,1974,0.0,0.0,0.0
1974,23,56,0.0,1.0,2021-07-12 23:56:00,1975,3.0,0.0,4.0
1976,23,58,4.0,0.0,2021-07-12 23:58:00,1977,2.0,0.0,2.0
1975,23,58,2.0,7.0,2021-07-12 23:58:00,1976,7.0,0.0,0.0


### How does the average floors travelled between trips compare to the average travel from the default position?

In [137]:
df_output = df.groupby('Default_Position').agg({'travel from default position':'mean', 'Floors':'mean'}).reset_index()
df_output.columns = ['Default Position', 'Avg travel from default position', 'Avg travel between trips currently']
df_output['Difference'] = df_output['Avg travel from default position'] - df_output['Avg travel between trips currently']

df_output[['Avg travel from default position','Avg travel between trips currently','Difference']] = df_output[['Avg travel from default position','Avg travel between trips currently','Difference']].round(2)

df_output

Unnamed: 0,Default Position,Avg travel from default position,Avg travel between trips currently,Difference
0,0.0,3.746586,4.451189,-0.704603


### Output the data

In [138]:
df_output.to_csv(r'output/2021-week30-output.csv')