# Retrieve data from PostGres to rank neighborhoods


# Import Dependencies

In [None]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect,join
from key import url
import numpy as np
import pandas as pd

# Connect to PostgreSQL

In [None]:
#Create the engine
engine = create_engine(url)

In [None]:
# reflect an existing database into a new model
Base = automap_base()

In [None]:
# reflect the tables
Base.prepare(engine, reflect=True)

In [None]:
session=Session(engine)

In [None]:
budget=100000

# Using Pandas for Data Analysis

In [None]:
#  Read the appraisal Table
appraisal=pd.read_sql_table('appraisal',engine)
appraisal.info()

In [None]:
# Calculate % of change of value between 2018 and 2019
appraisal_2018=appraisal.loc[appraisal.tax_year==2018,:]
appraisal_2019=appraisal.loc[appraisal.tax_year==2019,:]

In [None]:
appraisal_2019.info()

In [None]:
appraisal_df=pd.merge(appraisal_2019,appraisal_2018,on='account', suffixes=('_2019','_2018'))
appraisal_df.info()

In [None]:
appraisal_df['pct_value_change']=(appraisal_df['total_appraised_value_2019']-appraisal_df['total_appraised_value_2018'])\
                                  /appraisal_df['total_appraised_value_2018']*100
appraisal_df.info()

In [None]:
results_df=appraisal_df[['id_2019','account','total_appraised_value_2019', 'pct_value_change']]
results_df.head(10)

In [None]:
#Read properties table
properties_df=pd.read_sql_table('properties',engine)
properties_df.info()

In [None]:
results_df=pd.merge(results_df,properties_df,on="account")
results_df.head()

In [None]:
del results_df['address']

In [None]:
results_df.head()

In [None]:
results_df.info()

In [None]:
#Read crime table and merge to results
crime_df=pd.read_sql_table('crime',engine)
crime_df=crime_df.rename(columns={'Zip_Code':'Zip_code'})
crime_df.head()

In [None]:
crime_aggr=crime_df.groupby(['Zip_code']).count()['Offense_Count']
crime_aggr_df=pd.DataFrame(crime_aggr)
crime_aggr_df.head()

In [None]:
results_df=pd.merge(results_df,crime_aggr_df,on="Zip_code")
results_df.head()

In [None]:
results_df.info()

In [None]:
#Read property_school table and merge to results
property_school_df=pd.read_sql_table('property_school',engine)
property_school_df.head()

In [None]:
property_school_df.info()

In [None]:
results_df=pd.merge(results_df,property_school_df,on="account")
results_df.head()

In [None]:
results_df.info()

In [None]:
#Read school table and merge to results
school_df=pd.read_sql_table('school',engine)
school_df.head()

In [None]:
school_df.info()

In [None]:
results_df=pd.merge(results_df,school_df,on=['school_id','school_type'])
results_df.head()

In [None]:
results_df.info()

In [None]:
#Add flood ranking
#3- High Risk
#2 - Medium Risk
#1- Low Risk

results_df['flood_risk']=np.where(results_df['flood_description']=='AREA OF MINIMAL FLOOD HAZARD',1,3)
results_df.loc[(results_df['flood_description']=='0.2 PCT ANNUAL CHANCE FLOOD HAZARD'),'flood_risk']=2
results_df.loc[(results_df['flood_description']=='FLOODWAY'),'flood_risk']=3
results_df.head(50)

In [None]:
del results_df['flood_description']
del results_df['name']
del results_df['address']
del results_df['city']
del results_df['zip_code']
del results_df['district_id']
del results_df['latitude_y']
del results_df['longitude_y']

In [None]:
results_df=results_df.rename(columns={'latitude_x':'latitude', 'longitude_x':'longitude'})
results_df.head()

In [None]:
results_df.info()

In [None]:
# Count sales in 2019
results_df['sales2019']=np.where(results_df['new_owner_date']>'2018-12-31',1,0)

In [None]:
results_df.sample(10)

In [None]:
sales=results_df.groupby('neighborhood_code')['sales2019'].sum()

In [None]:
sales=pd.DataFrame(sales)
sales=sales.rename(columns={'sales2019':'sales_neighborhood_2019'})
sales.head(10)

In [None]:
results_df=pd.merge(results_df,sales, on="neighborhood_code")

In [None]:
results_df.info()

In [None]:
del results_df['sales2019']


In [None]:
results_df.info()

In [None]:
#  Read the neighborhoods Table
neighborhoods_df=pd.read_sql_table('neighborhoods',engine)
neighborhoods_df.info()

In [None]:
results_df=pd.merge(results_df,neighborhoods_df,on='neighborhood_code')
results_df.head()

In [None]:
results_df.info()

In [None]:
# Filter by budget on year 2019
results_df=results_df.loc[results_df.total_appraised_value_2019<=budget,:]
results_df.info()

In [3]:
def SQL_Pull(budget):
    # Import Dependencies
    # Python SQL toolkit and Object Relational Mapper
    import sqlalchemy
    from sqlalchemy.ext.automap import automap_base
    from sqlalchemy.orm import Session
    from sqlalchemy import create_engine, func, inspect,join
    from key import url
    import numpy as np
    import pandas as pd
    
    #Create the engine
    engine = create_engine(url)
    
    # reflect an existing database into a new model
    Base = automap_base()
    
    # reflect the tables
    Base.prepare(engine, reflect=True)
    
    session=Session(engine)
    
    # Using Pandas for Data Analysis
    #  Read the appraisal Table
    appraisal=pd.read_sql_table('appraisal',engine)
    
    
    # Calculate % of change of value between 2018 and 2019
    appraisal_2018=appraisal.loc[appraisal.tax_year==2018,:]
    appraisal_2019=appraisal.loc[appraisal.tax_year==2019,:]
    appraisal_df=pd.merge(appraisal_2019,appraisal_2018,on='account', suffixes=('_2019','_2018'))
    appraisal_df['pct_value_change']=(appraisal_df['total_appraised_value_2019']-appraisal_df['total_appraised_value_2018'])\
                                  /appraisal_df['total_appraised_value_2018']*100
    results_df=appraisal_df[['id_2019','account','total_appraised_value_2019', 'pct_value_change']]
    
    #Read properties table
    properties_df=pd.read_sql_table('properties',engine)
    results_df=pd.merge(results_df,properties_df,on="account")
    del results_df['address']
    
    #Read crime table and merge to results
    crime_df=pd.read_sql_table('crime',engine)
    crime_df=crime_df.rename(columns={'Zip_Code':'Zip_code'})
    crime_aggr=crime_df.groupby(['Zip_code']).count()['Offense_Count']
    crime_aggr_df=pd.DataFrame(crime_aggr)
    results_df=pd.merge(results_df,crime_aggr_df,on="Zip_code")
    
    #Read property_school table and merge to results
    property_school_df=pd.read_sql_table('property_school',engine)
    results_df=pd.merge(results_df,property_school_df,on="account")
    
    #Read school table and merge to results
    school_df=pd.read_sql_table('school',engine)
    results_df=pd.merge(results_df,school_df,on=['school_id','school_type'])

    
    #Add flood ranking
    #3- High Risk
    #2 - Medium Risk
    #1- Low Risk
    results_df['flood_risk']=np.where(results_df['flood_description']=='AREA OF MINIMAL FLOOD HAZARD',1,3)
    results_df.loc[(results_df['flood_description']=='0.2 PCT ANNUAL CHANCE FLOOD HAZARD'),'flood_risk']=2
    results_df.loc[(results_df['flood_description']=='FLOODWAY'),'flood_risk']=3
    del results_df['flood_description']
    del results_df['name']
    del results_df['address']
    del results_df['city']
    del results_df['zip_code']
    del results_df['district_id']
    del results_df['latitude_y']
    del results_df['longitude_y']
    results_df=results_df.rename(columns={'latitude_x':'latitude', 'longitude_x':'longitude'})
    
    
    # Count sales in 2019
    results_df['sales2019']=np.where(results_df['new_owner_date']>'2018-12-31',1,0)
    sales=results_df.groupby('neighborhood_code')['sales2019'].sum()
    sales=pd.DataFrame(sales)
    sales=sales.rename(columns={'sales2019':'sales_neighborhood_2019'})
    results_df=pd.merge(results_df,sales, on="neighborhood_code")
    del results_df['sales2019']
    
    #  Read the neighborhoods Table
    neighborhoods_df=pd.read_sql_table('neighborhoods',engine)
    results_df=pd.merge(results_df,neighborhoods_df,on='neighborhood_code')
    
    # Filter by budget on year 2019
    results_df=results_df.loc[results_df.total_appraised_value_2019<=budget,:]
    results_df.info()
    print('computation completed')
    return (results_df) 

#Run function SQL_Pull
SQL_Pull(1000000) 

    


<class 'pandas.core.frame.DataFrame'>
Int64Index: 44655 entries, 0 to 57380
Data columns (total 18 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   id_2019                     44655 non-null  int64         
 1   account                     44655 non-null  int64         
 2   total_appraised_value_2019  44655 non-null  float64       
 3   pct_value_change            44655 non-null  float64       
 4   latitude                    44655 non-null  float64       
 5   longitude                   44655 non-null  float64       
 6   Zip_code                    44655 non-null  int64         
 7   neighborhood_code           44655 non-null  float64       
 8   acreage                     44655 non-null  float64       
 9   new_owner_date              44652 non-null  datetime64[ns]
 10  sq_ft                       44646 non-null  float64       
 11  Offense_Count               44655 non-null  int64     

Unnamed: 0,id_2019,account,total_appraised_value_2019,pct_value_change,latitude,longitude,Zip_code,neighborhood_code,acreage,new_owner_date,sq_ft,Offense_Count,school_id,school_type,school_rating,flood_risk,sales_neighborhood_2019,neighborhood
0,1,21750000003,299000.0,0.000000,29.74614,-95.36987,77002,8400.07,0.0405,2009-10-13,1944.0,4743,101912110,Elementary,75,1,6,BALDWIN SQ/TUAM SQ/MCGREGOR
1,3,21750000018,296400.0,0.000000,29.74619,-95.36996,77002,8400.07,0.0348,2018-06-29,1944.0,4743,101912110,Elementary,75,1,6,BALDWIN SQ/TUAM SQ/MCGREGOR
2,15532,1215300010001,331080.0,0.000000,29.74530,-95.36882,77002,8400.07,0.0413,2017-06-28,1924.0,4743,101912110,Elementary,75,1,6,BALDWIN SQ/TUAM SQ/MCGREGOR
3,15533,1215300010002,287800.0,-14.717082,29.74525,-95.36874,77002,8400.07,0.0352,2016-05-23,1965.0,4743,101912110,Elementary,75,1,6,BALDWIN SQ/TUAM SQ/MCGREGOR
4,15534,1215300010003,299000.0,0.000000,29.74520,-95.36862,77002,8400.07,0.0480,2013-09-09,1965.0,4743,101912110,Elementary,75,1,6,BALDWIN SQ/TUAM SQ/MCGREGOR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57376,13619,923430000047,493347.0,22.143711,29.69054,-95.44452,77025,7447.07,0.3767,2010-12-02,2636.0,2216,101912178,Elementary,95,3,84,AYRSHIRE SEC 6-14
57377,13620,923430000049,384780.0,24.664267,29.69047,-95.44483,77025,7447.07,0.2470,2017-05-25,2542.0,2216,101912178,Elementary,95,3,84,AYRSHIRE SEC 6-14
57378,13621,923430000050,518499.0,61.979813,29.69039,-95.44529,77025,7447.07,0.2551,2018-01-18,2097.0,2216,101912178,Elementary,95,3,84,AYRSHIRE SEC 6-14
57379,13622,923430000051,332834.0,0.000000,29.69038,-95.44554,77025,7447.07,0.2807,2019-08-19,2975.0,2216,101912178,Elementary,95,3,84,AYRSHIRE SEC 6-14
