In [1]:
import pandas as pd
import numpy as np
import os as os

# SQL Alchemy
from sqlalchemy import create_engine

In [2]:
# Create Engine for employee db
engine = create_engine('postgresql://localhost:5432/Minneapolis_Police_Force_db')
connection = engine.connect()

In [5]:
#load csv into dataframe
load_csv = "source_files/MLPS_Neighborhoods_raw.csv"
MLPS_Neighborhoods_df = pd.read_csv(load_csv)
MLPS_Neighborhoods_df.head()

Unnamed: 0,NEIGHBORHOOD,COMMUNITY
0,Armatage,Southwest
1,Audubon Park,Northeast
2,Bancroft,Powderhorn
3,Beltrami,Northeast
4,Bottineau,Northeast


In [6]:
#Fetch community data from Postgres
community_df = pd.read_sql("SELECT community_id, name FROM community", connection)
                              
community_df.head()   

Unnamed: 0,community_id,name
0,1,Calhoun Isles
1,2,Camden
2,3,Central
3,4,Longfellow
4,5,Near North


In [7]:
#Join neighborhood data and community df to get community id for each neighborhood
neighborhood_community=pd.merge(MLPS_Neighborhoods_df, community_df, how='inner', left_on='COMMUNITY', right_on='name')

neighborhood_community.head()

Unnamed: 0,NEIGHBORHOOD,COMMUNITY,community_id,name
0,Armatage,Southwest,10,Southwest
1,East Harriet,Southwest,10,Southwest
2,Fulton,Southwest,10,Southwest
3,Kenny,Southwest,10,Southwest
4,King Field,Southwest,10,Southwest


In [8]:
#Count number of neighborhoods in both dfs to validate if counts match
print(MLPS_Neighborhoods_df['NEIGHBORHOOD'].count())
print(neighborhood_community['NEIGHBORHOOD'].count())

87
87


In [9]:
#Drop fields from df which are not required in final output
neighborhood_community=neighborhood_community[['NEIGHBORHOOD','community_id']]
neighborhood_community.head()

Unnamed: 0,NEIGHBORHOOD,community_id
0,Armatage,10
1,East Harriet,10
2,Fulton,10
3,Kenny,10
4,King Field,10


In [10]:
#Rename df names to match Postgres table
neighborhood_community.rename(columns={'NEIGHBORHOOD': 'name'}, inplace=True)
neighborhood_community.head()

Unnamed: 0,name,community_id
0,Armatage,10
1,East Harriet,10
2,Fulton,10
3,Kenny,10
4,King Field,10


In [11]:
#Start index from 1 in MLPS_Communities_df and rename index field
neighborhood_community.index = np.arange(1, len(neighborhood_community)+1)
neighborhood_community.index = neighborhood_community.index.set_names(['neighborhood_id'])

neighborhood_community.head()

Unnamed: 0_level_0,name,community_id
neighborhood_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Armatage,10
2,East Harriet,10
3,Fulton,10
4,Kenny,10
5,King Field,10


In [10]:
#append data to Postgres existing table
neighborhood_community.to_sql('neighborhood', engine,if_exists='append')

In [12]:
#Save data to a csv file
output_data_file = "target_files/MLS_Neighborhoods.csv"
output_path = os.path.join(output_data_file)
neighborhood_community.to_csv(output_path, header=True)