# Austin, TX Crime Analysis

In [1]:
# Dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from usefuls import atx_zip_codes, census_key, zipcode_tabulation_area, similar_offense_type

$$\sum_{w\in\mathfrak{S}_n}{q^{\text{inv}(w)}} = [n]_q!$$

## Get Property Data from Zillow
* The data file is too large to upload to GitHub, but it is obtainable from [here](https://www.kaggle.com/zillow/zecon#Zip_time_series.cs)

In [4]:
# Read the file
zillow_df = pd.read_csv('Zip_time_series.csv')

# Rename RegionName to zipcode
zillow_df = zillow_df.rename(columns={'RegionName': 'Zip Code'})

# Filter out all non-Austin area zipcodes
zillow_df = zillow_df[[(z in atx_zip_codes) for z in zillow_df['Zip Code']]]

# Add year column to dataframe
zillow_df['year'] = zillow_df['Date'].apply(lambda s : int(s[:4]))

# Filter out all years before 2009
zillow_df = zillow_df[(zillow_df['year'] >= 2011) & (zillow_df['year'] <= 2016)]

# Keep only the columns we need
zillow_df = zillow_df[['year', 'Zip Code', 'ZHVI_AllHomes']]

# Drop NaN values
zillow_df = zillow_df.dropna()

# Group data by year, zipcode, find the mean ZHVI per year per zipcode
zillow_df = round(zillow_df.groupby(['year', 'Zip Code']).ZHVI_AllHomes.mean(),2).to_frame()

# Preview the frame
zillow_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ZHVI_AllHomes
year,Zip Code,Unnamed: 2_level_1
2011,78610,167591.67
2011,78613,182450.0
2011,78617,104841.67
2011,78641,143600.0
2011,78664,130350.0


In [5]:
zillow_df

Unnamed: 0_level_0,Unnamed: 1_level_0,ZHVI_AllHomes
year,Zip Code,Unnamed: 2_level_1
2011,78610,167591.67
2011,78613,182450.00
2011,78617,104841.67
2011,78641,143600.00
2011,78664,130350.00
2011,78681,191216.67
2011,78701,303008.33
2011,78702,198791.67
2011,78703,509408.33
2011,78704,301783.33
