In [116]:
# Import Dependencies
import pandas as pd
from itertools import tee, islice, chain

In [117]:
# Read in Monthly Sales Price Data
price = pd.read_excel('Data/Med:Avg Sales Price.xls', sheet_name='Price ')
# Clean up DataFrame
price.columns = price.iloc[1]
price = price.iloc[2:]
# Remove timestamp from Period column
price['Period'] = pd.to_datetime(price['Period'], errors='coerce')
# Insert Median prices into empty Average values
price['Average'].loc[2:145] = price['Median '].loc[2:145]
# Drop Median column
price.drop(columns='Median ', inplace=True)
# Set Period as index
price.set_index('Period', inplace=True)
price = price.rename_axis('Period', axis='columns')
price.index.name = None
# Drop Null Values
price = price.dropna()
# Set a more accurate name for column
price.rename(columns={'Average': 'Avg. Price'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  price['Average'].loc[2:145] = price['Median '].loc[2:145]


In [118]:
# Read in Monthly Sales Quantity Data
quantity = pd.read_excel('Data/Sold by Region.xls', sheet_name='Reg Sold')
# Clean up df
quantity = quantity.iloc[7:]
quantity.rename(columns={'Unnamed: 1': 'Quantity'}, inplace= True)
quantity.drop(columns=['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4', 'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10'], inplace= True)
# Drop nulls
quantity = quantity.dropna()
# Convert index to period format
quantity['Houses Sold by Region'] = pd.to_datetime(quantity['Houses Sold by Region'], errors='coerce')
quantity.rename(columns={'Houses Sold by Region': 'Period'}, inplace= True)
quantity.set_index('Period', inplace=True)
# Set quantity to actual values
quantity['Quantity'] = quantity['Quantity'] * 1000

In [119]:
# Merge two dataframes together
revenue = pd.concat([price, quantity], axis= 1)
revenue

Unnamed: 0,Avg. Price,Quantity
1963-01-01,17200,42000
1963-02-01,17700,35000
1963-03-01,18200,44000
1963-04-01,18200,52000
1963-05-01,17500,58000
...,...,...
2021-12-01,491000,61000
2022-01-01,501200,70000
2022-02-01,513900,72000
2022-03-01,522500,68000


In [120]:
# Multiply price with quantity
revenue['Avg. Revenue'] = revenue['Avg. Price'] * revenue['Quantity']
revenue

Unnamed: 0,Avg. Price,Quantity,Avg. Revenue
1963-01-01,17200,42000,722400000
1963-02-01,17700,35000,619500000
1963-03-01,18200,44000,800800000
1963-04-01,18200,52000,946400000
1963-05-01,17500,58000,1015000000
...,...,...,...
2021-12-01,491000,61000,29951000000
2022-01-01,501200,70000,35084000000
2022-02-01,513900,72000,37000800000
2022-03-01,522500,68000,35530000000


In [121]:
# Function to access next and previous items in a for loop
# found on stack overflow: https://stackoverflow.com/questions/1011938/loop-that-also-accesses-previous-and-next-values
def previous_and_next(some_iterable):
    prevs, items, nexts = tee(some_iterable, 3)
    prevs = chain([None], prevs)
    nexts = chain(islice(nexts, 1, None), [None])
    return zip(prevs, items, nexts)

In [122]:
# Iterate over Revenue column to determine periods of Expansion and Recession
rev = revenue['Avg. Revenue']

expansion = []
recession = []
counter1 = 0
counter2 = 0

for previous, item, nxt in previous_and_next(rev):
    try:
        if previous < item:
            counter1 +=1
            expansion.append(counter1)
        else:
            counter1 = 0
            expansion.append(counter1)
    except:
        counter1 = 0
        expansion.append(counter1)
    try:
        if previous > item:
            counter2 +=1
            recession.append(counter2)
        else:
            counter2 = 0
            recession.append(counter2)
    except:
        counter2 = 0
        recession.append(counter2)

In [123]:
# Insert Expansion and Recession values into DF
revenue.insert(loc=3, column= 'Periods of Expansion', value= expansion)
revenue.insert(loc=4, column= 'Periods of Recession', value= recession)

In [124]:
# Calculate percentage gained and lossed month over month
percent_change = []

for previous, item, nxt in previous_and_next(rev):
    try:
        result = ((item - previous)/previous)
        result = "{:.2%}".format(result)
        percent_change.append(result)
    except:
        result = 0 
        result = "{:.0%}".format(result)
        percent_change.append(result)

In [125]:
# Insert percentage change into DF
revenue.insert(loc=4, column= '% Change', value= percent_change)

In [126]:
# Iterate through df to find the bottom of the recession
recession = revenue['Periods of Recession']
valley = []
for previous, item, nxt in previous_and_next(recession):
    try:
        if nxt < item > previous:
            valley.append('Yes')
        else:
            valley.append('No')
    except:
        valley.append('No')

In [127]:
# Iterate through df to find the peak of the expansion
expansion = revenue['Periods of Expansion']
peak = []
for previous, item, nxt in previous_and_next(expansion):
    try:
        if nxt < item > previous:
            peak.append('Yes')
        else:
            peak.append('No')
    except:
        peak.append('No')

In [128]:
# Insert peaks and valleys into DF
revenue.insert(loc=4, column='Peak', value= peak)
revenue.insert(loc=6, column='Valley', value= valley)


In [130]:
# Export to excel workbook
revenue.to_excel('US Housing Monthly Revenue.xlsx')