# Notes
- Last updated 3/27 9:20pm
- Zillow csv problem: Some zillow files work in pd.read_csv(file) with no problem but if it doesn't work, try pd.read_csv(file, encoding="latin")

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
zillow_ppsf_file = "raw data/Zillow_County_MedianListingPricePerSqft.csv"
zppsf_df = pd.read_csv(zillow_ppsf_file, encoding="latin")

In [3]:
columns = zppsf_df.columns

# Getting just 2014-2018 (5 complete years)
filtered_columns = [col for col in columns if (col.startswith('Region')) or 
                    (col.startswith('State')) or (col.startswith('2014')) or 
                    (col.startswith('2015')) or (col.startswith('2016')) or 
                    (col.startswith('2017')) or (col.startswith('2018'))]
df_2014_2018 = zppsf_df[filtered_columns]
df_2014_2018.head()

Unnamed: 0,RegionName,State,StateCodeFIPS,2014-01,2014-02,2014-03,2014-04,2014-05,2014-06,2014-07,...,2018-03,2018-04,2018-05,2018-06,2018-07,2018-08,2018-09,2018-10,2018-11,2018-12
0,Los Angeles County,CA,6,308.314937,311.303961,313.796366,320.930936,326.771654,330.29722,332.925067,...,420.309348,424.501425,426.717557,429.553265,429.411703,428.794179,429.32384,431.875174,431.916427,427.728474
1,Cook County,IL,17,131.932222,138.680473,146.32107,151.926176,156.928328,158.980355,159.03942,...,190.625,195.898521,197.038724,197.527361,197.001218,195.975232,195.756504,194.983753,193.530311,191.382061
2,Harris County,TX,48,86.623749,89.20406,90.544306,92.328935,93.217615,93.283582,93.860984,...,118.289195,118.604015,118.70155,118.367701,117.68404,117.481203,118.227425,118.443248,118.76117,119.016133
3,Maricopa County,AZ,4,124.621932,125.544337,126.245829,127.090909,126.73362,126.719768,126.013905,...,160.093532,161.697898,161.947415,161.460484,161.122661,161.267099,161.809986,162.835249,163.304515,163.733938
4,San Diego County,CA,6,278.093076,282.608696,287.163375,290.73457,295.074946,296.680498,296.414577,...,360.28852,362.426036,375.0,377.0445,375.922229,377.49004,377.018874,372.381691,371.822272,372.164329


In [4]:
# Changing format to add columns: DATES and VALUES
long_skinny_df = pd.melt(df_2014_2018, id_vars=["RegionName", "State", "StateCodeFIPS"], var_name = "Month", value_name = "Price per ft2").dropna(how='any')
long_skinny_df = long_skinny_df.sort_values('Price per ft2', ascending=False)

In [5]:
grouped_by_date = long_skinny_df.groupby(['Month'])

In [6]:
# Find the x=14 most expensive Counties for each month
# 14 gets a list of 20 Counties. 15 gets a list of 22 Counties.
top_df = grouped_by_date.head(14)

# Show the Counties that show up on the monthly list of x priciest and how often
top_df['RegionName'].value_counts()


New York County         60
Maui County             60
Pitkin County           60
Teton County            60
Dukes County            60
Nantucket County        60
San Mateo County        60
Marin County            60
San Francisco County    60
Santa Clara County      59
Kings County            56
San Miguel County       41
Honolulu County         36
Summit County           33
Suffolk County          27
Kauai County            19
District of Columbia    17
Eagle County             8
Santa Barbara County     3
Santa Cruz County        1
Name: RegionName, dtype: int64

In [7]:
# Find the x cheapest Counties for each month
bottom_df = grouped_by_date.tail(14)

# Show the Counties that show up on the monthly list of x cheapest and how often
bottom_df['RegionName'].value_counts()

Hutchinson County    60
Randolph County      46
Montgomery County    46
Labette County       43
Mitchell County      43
Cass County          42
Wyoming County       40
Morris County        37
Nolan County         35
Cherokee County      33
Eastland County      24
Dunklin County       24
Neosho County        23
McCulloch County     23
Monroe County        20
Telfair County       19
Brown County         16
Jackson County       15
Falls County         15
Haskell County       15
Stephenson County    14
Fayette County       14
Allen County         14
Greer County         13
Runnels County       12
Seminole County      10
Fountain County      10
Hughes County         8
Houston County        8
Lawrence County       8
                     ..
Pecos County          5
Ouachita County       5
Keokuk County         5
Callahan County       5
Turner County         4
Young County          4
Kingman County        4
Washita County        4
Bourbon County        3
Caldwell County       3
Wayne County    