In [175]:
import pandas as pd
import numpy as np

In [176]:
hate_crime_df = pd.read_csv("../Datasets/hate_crime.csv", low_memory=False)
hate_crime_df.columns

Index(['INCIDENT_ID', 'DATA_YEAR', 'ORI', 'PUB_AGENCY_NAME', 'PUB_AGENCY_UNIT',
       'AGENCY_TYPE_NAME', 'STATE_ABBR', 'STATE_NAME', 'DIVISION_NAME',
       'REGION_NAME', 'POPULATION_GROUP_CODE', 'POPULATION_GROUP_DESC',
       'INCIDENT_DATE', 'ADULT_VICTIM_COUNT', 'JUVENILE_VICTIM_COUNT',
       'TOTAL_OFFENDER_COUNT', 'ADULT_OFFENDER_COUNT',
       'JUVENILE_OFFENDER_COUNT', 'OFFENDER_RACE', 'OFFENDER_ETHNICITY',
       'VICTIM_COUNT', 'OFFENSE_NAME', 'TOTAL_INDIVIDUAL_VICTIMS',
       'LOCATION_NAME', 'BIAS_DESC', 'VICTIM_TYPES', 'MULTIPLE_OFFENSE',
       'MULTIPLE_BIAS'],
      dtype='object')

In [177]:
hate_crime_df = hate_crime_df.fillna(np.nan).replace([np.nan], [None])

In [178]:
hate_crime_df["BIAS_DESC"] = hate_crime_df["BIAS_DESC"].str.replace(","," ")
hate_crime_df["OFFENSE_NAME"] = hate_crime_df["OFFENSE_NAME"].str.replace(","," ")
hate_crime_df["POPULATION_GROUP_DESC"] = hate_crime_df["POPULATION_GROUP_DESC"].str.replace(","," ")
hate_crime_df["PUB_AGENCY_UNIT"] = hate_crime_df["PUB_AGENCY_UNIT"].str.replace(","," ")
hate_crime_df["PUB_AGENCY_NAME"] = hate_crime_df["PUB_AGENCY_NAME"].str.replace(","," ")

* Creating two seperate dataframes as per the data model for 2 tables.

In [179]:
hc_incident_df = hate_crime_df[['INCIDENT_ID', 'DATA_YEAR',
       'INCIDENT_DATE', 'ADULT_VICTIM_COUNT', 'JUVENILE_VICTIM_COUNT',
       'TOTAL_OFFENDER_COUNT', 'ADULT_OFFENDER_COUNT',
       'JUVENILE_OFFENDER_COUNT', 'OFFENDER_RACE', 'OFFENDER_ETHNICITY',
       'VICTIM_COUNT', 'OFFENSE_NAME', 'TOTAL_INDIVIDUAL_VICTIMS',
       'BIAS_DESC', 'VICTIM_TYPES', 'MULTIPLE_OFFENSE',
       'MULTIPLE_BIAS', 'ORI', 'LOCATION_NAME']]

hc_location_df = hate_crime_df[['ORI', 'PUB_AGENCY_NAME', 'PUB_AGENCY_UNIT', 'AGENCY_TYPE_NAME',
       'STATE_ABBR', 'STATE_NAME', 'DIVISION_NAME', 'REGION_NAME',
       'POPULATION_GROUP_CODE', 'POPULATION_GROUP_DESC', 'LOCATION_NAME']]
hc_location_df = hc_location_df.drop_duplicates(subset=['ORI', 'LOCATION_NAME'], keep=False)

In [180]:
display(hc_incident_df.shape)
hc_location_df.shape

(219577, 19)

(23299, 11)

In [181]:
hc_incident_df.to_csv("incident.csv", index=False)
hc_location_df.to_csv("location.csv", index=False)

In [182]:
# Import the module to run psycopg2
import psycopg2

# Import the .py file that I created to store my password
from passwords import password



In [183]:
# Connect to my postgreSQL dvdrental database with default connections
# Need to add my password for it to work

# The instantiates a persistant client to speak with
conn = psycopg2.connect(host='localhost', dbname='postgres', user='postgres', password=password)

# The above 'conn' code created a connection "object" for us to use in future code

conn.autocommit = True

In [184]:
#Creating a cursor object using the cursor() method
cursor = conn.cursor()

#Preparing query to create a database
sql = '''CREATE database hate_crime''';

#Creating a database
cursor.execute(sql)
print("Database created successfully........")

#Closing the connection
conn.close()

Database created successfully........


In [185]:
conn = psycopg2.connect(host='localhost', dbname='hate_crime', user='postgres', password=password)

cursor = conn.cursor()

cursor.execute("""CREATE TABLE incident(
    INCIDENT_ID integer PRIMARY KEY,
    DATA_YEAR integer,  
    INCIDENT_DATE date, 
    ADULT_VICTIM_COUNT float,
    JUVENILE_VICTIM_COUNT float,
    TOTAL_OFFENDER_COUNT integer,  
    ADULT_OFFENDER_COUNT float,
    JUVENILE_OFFENDER_COUNT float,
    OFFENDER_RACE text, 
    OFFENDER_ETHNICITY text, 
    VICTIM_COUNT integer, 
    OFFENSE_NAME text,
    TOTAL_INDIVIDUAL_VICTIMS float,
    BIAS_DESC text, 
    VICTIM_TYPES text, 
    MULTIPLE_OFFENSE text, 
    MULTIPLE_BIAS text,
    ORI text,
    LOCATION_NAME text
)
""")
conn.commit()
print("Table created")
conn.close()

Table created


In [186]:
conn = psycopg2.connect(host='localhost', dbname='hate_crime', user='postgres', password=password)

cursor = conn.cursor()

with open('incident.csv', 'r') as f:
    next(f) # Skip the header row.
    cursor.copy_from(f, 'incident', sep=',',null='')
    
conn.commit()
conn.close()

In [187]:
conn = psycopg2.connect(host='localhost', dbname='hate_crime', user='postgres', password=password)

cursor = conn.cursor()

cursor.execute("""CREATE TABLE location(
    ORI text NOT NULL,
    PUB_AGENCY_NAME text,
    PUB_AGENCY_UNIT text,
    AGENCY_TYPE_NAME text,
    STATE_ABBR text,
    STATE_NAME text,
    DIVISION_NAME text,
    REGION_NAME text,
    POPULATION_GROUP_CODE text,
    POPULATION_GROUP_DESC text,
    LOCATION_NAME text NOT NULL,
    CONSTRAINT location_id PRIMARY KEY (ORI,LOCATION_NAME)
)
""")
conn.commit()
print("Table created")
conn.close()

Table created


In [188]:
conn = psycopg2.connect(host='localhost', dbname='hate_crime', user='postgres', password=password)

cursor = conn.cursor()

with open('location.csv', 'r') as f:
    next(f) # Skip the header row.
    cursor.copy_from(f, 'location', sep=',',null='')
    
conn.commit()
conn.close()

In [190]:
conn = psycopg2.connect(host='localhost', dbname='hate_crime', user='postgres', password=password)

cur = conn.cursor()

cur.execute('SELECT * FROM location')
one = cur.fetchone()
all = cur.fetchall()

one

('AR0350100',
 'Pine Bluff',
 None,
 'City',
 'AR',
 'Arkansas',
 'West South Central',
 'South',
 '3',
 'Cities from 50 000 thru 99 999',
 'Service/Gas Station')

In [191]:
conn.close()