In [105]:
# ------ START OF COMMON FILE ANALYSIS CODE -----
# Dependencies
import pandas as pd
from pathlib import Path
import scipy.stats as st
import matplotlib.pyplot as plt
import numpy as np

In [106]:
# Set filepath for file to analyze
# NOTE: Edit this for each new file being run
zillow_csv_path = Path('Zillow_Data/Metro_zori_uc_sfrcondomfr_sm_month.csv')
zillow_metric_name = 'Rent Index'

In [107]:
# Create a data frame containing the data from the CSV
zillow_df = pd.read_csv(zillow_csv_path)
zillow_df.head()

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,2015-01-31,2015-02-28,2015-03-31,2015-04-30,2015-05-31,...,2023-09-30,2023-10-31,2023-11-30,2023-12-31,2024-01-31,2024-02-29,2024-03-31,2024-04-30,2024-05-31,2024-06-30
0,102001,0,United States,country,,1253.449061,1260.313718,1269.579976,1279.021194,1288.644933,...,2005.728434,2004.533106,2000.353386,1997.393964,1999.895957,2007.546945,2018.360685,2030.249059,2042.463752,2053.69292
1,394913,1,"New York, NY",msa,NY,2419.800397,2435.544929,2454.626643,2476.503269,2492.789838,...,3381.793473,3365.593176,3341.65675,3322.333169,3325.32804,3343.128943,3376.059213,3406.35257,3442.299156,3472.415449
2,753899,2,"Los Angeles, CA",msa,CA,1848.680007,1860.783762,1877.225401,1890.842498,1906.330946,...,2929.966694,2927.456029,2913.00062,2903.956011,2906.885602,2918.73467,2935.569918,2945.721034,2961.320732,2975.213293
3,394463,3,"Chicago, IL",msa,IL,1460.223748,1467.029721,1477.360961,1487.142749,1498.090186,...,2035.47526,2028.844172,2021.458598,2023.773775,2032.74636,2046.254882,2059.188684,2080.211095,2099.845487,2118.487003
4,394514,4,"Dallas, TX",msa,TX,1138.255989,1144.206978,1152.548664,1164.799816,1174.934505,...,1829.253585,1821.775857,1815.14872,1806.477735,1801.778337,1797.711117,1801.383652,1806.746317,1815.277434,1822.28815


In [108]:
# Rename labels
zillow_df.rename(columns={'SizeRank': 'Size Rank', 'RegionName': 'Metro Area', 'StateName': 'State' }, inplace=True)
# Remove the RegionType column
zillow_df = zillow_df.drop('RegionType', axis = 1)
# Insert a new blank column to store the Size Segment
zillow_df['Size Segment'] = pd.NA
# Re-order columns so that Size Segment is next to Size Rank
columns = zillow_df.columns.tolist()
columns.remove('Size Segment')
columns.insert(2, 'Size Segment')
zillow_df = zillow_df[columns]
zillow_df.head()

Unnamed: 0,RegionID,Size Rank,Size Segment,Metro Area,State,2015-01-31,2015-02-28,2015-03-31,2015-04-30,2015-05-31,...,2023-09-30,2023-10-31,2023-11-30,2023-12-31,2024-01-31,2024-02-29,2024-03-31,2024-04-30,2024-05-31,2024-06-30
0,102001,0,,United States,,1253.449061,1260.313718,1269.579976,1279.021194,1288.644933,...,2005.728434,2004.533106,2000.353386,1997.393964,1999.895957,2007.546945,2018.360685,2030.249059,2042.463752,2053.69292
1,394913,1,,"New York, NY",NY,2419.800397,2435.544929,2454.626643,2476.503269,2492.789838,...,3381.793473,3365.593176,3341.65675,3322.333169,3325.32804,3343.128943,3376.059213,3406.35257,3442.299156,3472.415449
2,753899,2,,"Los Angeles, CA",CA,1848.680007,1860.783762,1877.225401,1890.842498,1906.330946,...,2929.966694,2927.456029,2913.00062,2903.956011,2906.885602,2918.73467,2935.569918,2945.721034,2961.320732,2975.213293
3,394463,3,,"Chicago, IL",IL,1460.223748,1467.029721,1477.360961,1487.142749,1498.090186,...,2035.47526,2028.844172,2021.458598,2023.773775,2032.74636,2046.254882,2059.188684,2080.211095,2099.845487,2118.487003
4,394514,4,,"Dallas, TX",TX,1138.255989,1144.206978,1152.548664,1164.799816,1174.934505,...,1829.253585,1821.775857,1815.14872,1806.477735,1801.778337,1797.711117,1801.383652,1806.746317,1815.277434,1822.28815


In [109]:
# Establish Size Segments as bins
size_segment_bins = [-1, 0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
size_segment_labels = ["National Average", "Top 100", "101 - 200", "201 - 300", "301 - 400", "401 - 500", "501 - 600", "601 - 700", "701 - 800", "801 - 900", "901 - 1000"]

In [103]:
# Insert segment values into Size Segment column
zillow_df['Size Segment'] = pd.cut(zillow_df["Size Rank"], bins=size_segment_bins, labels=size_segment_labels)
zillow_df.head()

Unnamed: 0,RegionID,Size Rank,Size Segment,Metro Area,State,2015-01-31,2015-02-28,2015-03-31,2015-04-30,2015-05-31,...,2023-09-30,2023-10-31,2023-11-30,2023-12-31,2024-01-31,2024-02-29,2024-03-31,2024-04-30,2024-05-31,2024-06-30
0,102001,0,National Average,United States,,1253.449061,1260.313718,1269.579976,1279.021194,1288.644933,...,2005.728434,2004.533106,2000.353386,1997.393964,1999.895957,2007.546945,2018.360685,2030.249059,2042.463752,2053.69292
1,394913,1,Top 100,"New York, NY",NY,2419.800397,2435.544929,2454.626643,2476.503269,2492.789838,...,3381.793473,3365.593176,3341.65675,3322.333169,3325.32804,3343.128943,3376.059213,3406.35257,3442.299156,3472.415449
2,753899,2,Top 100,"Los Angeles, CA",CA,1848.680007,1860.783762,1877.225401,1890.842498,1906.330946,...,2929.966694,2927.456029,2913.00062,2903.956011,2906.885602,2918.73467,2935.569918,2945.721034,2961.320732,2975.213293
3,394463,3,Top 100,"Chicago, IL",IL,1460.223748,1467.029721,1477.360961,1487.142749,1498.090186,...,2035.47526,2028.844172,2021.458598,2023.773775,2032.74636,2046.254882,2059.188684,2080.211095,2099.845487,2118.487003
4,394514,4,Top 100,"Dallas, TX",TX,1138.255989,1144.206978,1152.548664,1164.799816,1174.934505,...,1829.253585,1821.775857,1815.14872,1806.477735,1801.778337,1797.711117,1801.383652,1806.746317,1815.277434,1822.28815
5,394692,5,Top 100,"Houston, TX",TX,1259.273641,1263.058053,1269.765848,1280.983746,1290.915304,...,1699.323721,1695.593628,1691.173243,1690.51671,1692.044702,1696.352601,1700.501945,1709.371362,1719.87078,1730.042807
6,395209,6,Top 100,"Washington, DC",VA,1778.364828,1784.295919,1796.589137,1805.185655,1815.99324,...,2372.897856,2372.518872,2368.51208,2361.023781,2362.296245,2373.642526,2394.991296,2415.543801,2434.996226,2455.08434
7,394974,7,Top 100,"Philadelphia, PA",PA,1282.204515,1288.299611,1300.648078,1308.172064,1314.549869,...,1854.958628,1862.552743,1864.746436,1859.486015,1857.951754,1863.201493,1871.936428,1880.701119,1888.157142,1898.122622
8,394856,8,Top 100,"Miami, FL",FL,1541.955931,1548.596827,1554.496158,1561.504776,1570.56077,...,2773.420166,2786.189748,2793.10161,2787.690997,2782.670461,2786.726922,2796.143903,2805.809026,2807.489234,2812.79146
9,394347,9,Top 100,"Atlanta, GA",GA,1067.182417,1073.331111,1077.273787,1087.917831,1095.746544,...,1940.220102,1931.387105,1918.618272,1909.622507,1913.255449,1924.268325,1930.540716,1936.408702,1942.637418,1950.566249


In [104]:
# Create a unique data frame which is named after the metric currently being analyzed:
# Set the df name as a string equal to the zillow metric name, with spaces replaced with underscores and in all lowercase
zillow_unique_df_name = zillow_metric_name.replace(" ", "_").lower() + "_df"
globals()[zillow_unique_df_name] = zillow_df.copy()
print(f'Created new dataframe: {zillow_unique_df_name}')


Created new dataframe: rent_index_df
