### Prepping Data Challenge: Property Management (Week 15)


### Requirements
- Input the Rental Contracts data
- Work out the length of each contract in months 
- Work out the number of months until each contract expires (imagine today is 13th April 2022)
- Input the Office Space Prices data and join it to the contracts table
- Remove duplicated fields
- Create a row for each month that a rental contract will be live
  - Retain the details for each of the contracts in the new rows
- Edit 14/04/2022: Be careful at this point that the number of rows for each Office ID is equal to the Contract length
- Calculate the cumulative monthly cost of each office space contract
  - Remember we only have one contract per company
- This will create our first output
- Create a table that details total rent paid for completed years across all contracts and year to date figures for the current year, which would update as time goes on
- This will create our second output

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [2]:
# Input the Rental Contracts data.
df1 = pd.read_csv('WK15-Rental Contracts.csv', parse_dates=['Contract Start','Contract End'], dayfirst=True)

In [3]:
df1.head()

Unnamed: 0,ID,Country,City,Address,Company,Office Size,Contract Start,Contract End
0,1,United Kingdom,London,22487 Ridgeway Pass,"McLaughlin, Rutherford and Rohan",Medium,2021-01-09,2025-01-08
1,2,United Kingdom,Manchester,3 Graceland Lane,Nolan-Oberbrunner,Small,2021-02-06,2023-02-05
2,3,United Kingdom,Newcastle,22 Corben Circle,Schoen and Sons,Large,2021-07-02,2026-07-01
3,4,United Kingdom,Liverpool,628 Superior Plaza,"Olson, Berge and Walsh",Medium,2021-07-30,2024-07-29
4,5,United Kingdom,Liverpool,83179 Debra Pass,Hilll-Trantow,Small,2021-10-24,2024-10-23


In [4]:
#Work out the number of months until each contract expires (imagine today is 13th April 2022)
df1['Contract Start Month'] = df1['Contract Start'].apply(lambda x: x.replace(day=1))
df1['Contract End Month'] = df1['Contract End'].apply(lambda x: (x.replace(day=1) - timedelta(days=1)).replace(day=1))

In [5]:
df1['Contract Length'] = ((df1['Contract End'] - df1['Contract Start'])/
                        np.timedelta64(1, 'M')).astype('int')
df1['Months Until Expiry'] = ((df1['Contract End'] - datetime(2022,4,13))/
                        np.timedelta64(1, 'M')).astype('int')

In [6]:
#Input the Office Space Prices data and join it to the contracts table
df2 = pd.read_csv('WK15-Office Space Prices.csv')

In [7]:
df1.head()

Unnamed: 0,ID,Country,City,Address,Company,Office Size,Contract Start,Contract End,Contract Start Month,Contract End Month,Contract Length,Months Until Expiry
0,1,United Kingdom,London,22487 Ridgeway Pass,"McLaughlin, Rutherford and Rohan",Medium,2021-01-09,2025-01-08,2021-01-01,2024-12-01,47,32
1,2,United Kingdom,Manchester,3 Graceland Lane,Nolan-Oberbrunner,Small,2021-02-06,2023-02-05,2021-02-01,2023-01-01,23,9
2,3,United Kingdom,Newcastle,22 Corben Circle,Schoen and Sons,Large,2021-07-02,2026-07-01,2021-07-01,2026-06-01,59,50
3,4,United Kingdom,Liverpool,628 Superior Plaza,"Olson, Berge and Walsh",Medium,2021-07-30,2024-07-29,2021-07-01,2024-06-01,35,27
4,5,United Kingdom,Liverpool,83179 Debra Pass,Hilll-Trantow,Small,2021-10-24,2024-10-23,2021-10-01,2024-09-01,35,30


In [8]:
df2.head()

Unnamed: 0,City,Office Size,People,Per Person,Rent per Month
0,Bristol,Small,20,410,8200
1,Bristol,Medium,50,410,20500
2,Bristol,Large,100,410,41000
3,Liverpool,Small,20,400,8000
4,Liverpool,Medium,50,400,20000


In [9]:
df  = df1.merge(df2, how = 'left', on = ['City','Office Size'])

In [10]:
#Create a row for each month that a rental contract will be live
df['Month Divider'] = [pd.date_range(i,j, freq='MS') for i,j in zip(df['Contract Start Month'],df['Contract End Month'])]
df = df.explode('Month Divider')

In [11]:
#Calculate the cumulative monthly cost of each office space contract
df['Cumulative Monthly Cost'] = df.groupby('Company')['Rent per Month'].cumsum()

In [12]:
#This will create our first output
df_output1 = df[['Cumulative Monthly Cost','ID','Country','City','Address','Company','Office Size','Contract Start',
                'Contract End','Contract Length','Months Until Expiry','People','Per Person','Rent per Month','Month Divider']]

In [13]:
df_output1.head()

Unnamed: 0,Cumulative Monthly Cost,ID,Country,City,Address,Company,Office Size,Contract Start,Contract End,Contract Length,Months Until Expiry,People,Per Person,Rent per Month,Month Divider
0,26000,1,United Kingdom,London,22487 Ridgeway Pass,"McLaughlin, Rutherford and Rohan",Medium,2021-01-09,2025-01-08,47,32,50,520,26000,2021-01-01
0,52000,1,United Kingdom,London,22487 Ridgeway Pass,"McLaughlin, Rutherford and Rohan",Medium,2021-01-09,2025-01-08,47,32,50,520,26000,2021-02-01
0,78000,1,United Kingdom,London,22487 Ridgeway Pass,"McLaughlin, Rutherford and Rohan",Medium,2021-01-09,2025-01-08,47,32,50,520,26000,2021-03-01
0,104000,1,United Kingdom,London,22487 Ridgeway Pass,"McLaughlin, Rutherford and Rohan",Medium,2021-01-09,2025-01-08,47,32,50,520,26000,2021-04-01
0,130000,1,United Kingdom,London,22487 Ridgeway Pass,"McLaughlin, Rutherford and Rohan",Medium,2021-01-09,2025-01-08,47,32,50,520,26000,2021-05-01


In [14]:
#output the data
df_output1.to_csv('wk15-output1.csv', index=False)

In [15]:
#Create a table that details total rent paid for completed years across all contracts and year to date figures 
#for the current year, which would update as time goes on
df['EoY'] = np.where(df['Month Divider'] <= datetime.now(), 'EoY and Current','Updated as time goes')

In [16]:
df_output2 = (pd.pivot_table(df.assign(Year=df['Month Divider'].dt.year), index='Year', columns='EoY',
                           values='Rent per Month', aggfunc='sum').reset_index())

In [17]:
df_output2 = df_output2[['Year','EoY and Current']]

In [18]:
df_output2.head()

EoY,Year,EoY and Current
0,2021,3407400.0
1,2022,2343200.0
2,2023,
3,2024,
4,2025,


In [19]:
#output the data
df_output2.to_csv('wk15-output2.csv', index=False)