In [1]:
import pandas as pd
import sqlite3
from sqlalchemy import create_engine

In [2]:
survey = pd.read_csv('../data/Metro_Resident_Survey_Responses.csv')
survey.shape

(2171, 181)

In [3]:
survey.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2171 entries, 0 to 2170
Columns: 181 entries, Period to Education Attained
dtypes: float64(1), int64(1), object(179)
memory usage: 3.0+ MB


#### Finding columns with less than 10 missing values - these are the ones we'll use to create the survey database

In [4]:
cols = survey.isnull().sum()[survey.isnull().sum() < 10]

In [5]:
cols_to_keep = list(cols.index.values)

In [6]:
cols_to_keep

['Period',
 'Start Date',
 'End Date',
 'ZIP Code',
 'Service District',
 'Quality of Life',
 'Overall Feeling of Safety',
 'Neighborhood Safety',
 'Neighborhood Appearance',
 'Nashville as Residence',
 'Nashville as Place to Raise Children',
 'Nashville as Workplace',
 'Nashville as Retirement Place',
 'Metro Services - Overall',
 'Value Received from Taxes',
 'Elected Official Leadership',
 'Metro Transparency and Ethics',
 'Customer Service',
 'Metro Communication',
 'Decision Input Opportunity',
 'Crime Prevention',
 'Police - Overall',
 'Fire and EMS - Overall',
 'Streets and Sidewalks - Overall',
 'Trash and Recycling - Overall',
 'Public Education',
 'Public Library',
 'Social Services - Overall',
 'Parks and Recreation - Overall',
 'Public Transit - Overall',
 'Metro-provided Arts and Culture - Overall',
 'City Street Maintenance',
 'Street Signs and Traffic Signals',
 'Bicycle Infrastructure',
 'Trash Service',
 'Curbside Recycling',
 'Drop-off Recycling',
 'Street Lighting',


In [7]:
survey2 = survey[cols_to_keep]

In [8]:
survey2.head()

Unnamed: 0,Period,Start Date,End Date,ZIP Code,Service District,Quality of Life,Overall Feeling of Safety,Neighborhood Safety,Neighborhood Appearance,Nashville as Residence,...,Healthy Food Access,Childcare Access,Pre-K Access,Public Transit Access,Diversity Support Efforts,Diversity Acceptance,Disability Access,Farmers' Market,General Hospital,Gender Identity
0,2018Q2,06/21/2018,07/16/2018,37208,Urban Services District,Neutral,Neutral,Neutral,Satisfied,Neutral,...,Satisfied,Satisfied,Satisfied,Satisfied,Satisfied,Neutral,Satisfied,Very Satisfied,Neutral,Female
1,2018Q2,06/21/2018,07/16/2018,37211,Urban Services District,Satisfied,Satisfied,Satisfied,Satisfied,Good,...,Very Satisfied,Very Satisfied,Very Satisfied,Dissatisfied,Satisfied,Dissatisfied,Dissatisfied,Satisfied,Don't Know,Female
2,2018Q2,06/21/2018,07/16/2018,37208,Urban Services District,Very Satisfied,Neutral,Satisfied,Satisfied,Excellent,...,Dissatisfied,Don't Know,Don't Know,Dissatisfied,Neutral,Satisfied,Satisfied,Satisfied,Don't Know,Male
3,2018Q2,06/21/2018,07/16/2018,37203,Urban Services District,Satisfied,Very Satisfied,Satisfied,Satisfied,Excellent,...,Dissatisfied,Don't Know,Don't Know,Very Dissatisfied,Very Satisfied,Very Satisfied,Don't Know,Satisfied,Don't Know,Male
4,2018Q1,02/01/2018,03/06/2018,37138,General Services District,Satisfied,Satisfied,Satisfied,Satisfied,Good,...,Satisfied,Don't Know,Don't Know,Neutral,Neutral,Neutral,Neutral,Neutral,Don't Know,Female


In [9]:
survey2 = survey2.drop(columns = ['Start Date', 'Service District'])

In [10]:
survey2 = survey2.rename(columns = {'End Date': 'Date'})

In [11]:
print(survey2.Date.min())
print(survey2.Date.max())

03/06/2018
12/30/2018


#### Adding an `Id` column so data can be rematched with SQL joins

In [12]:
survey2['Id'] = [i for i in range(1, 2172)]
survey2.head(3)

Unnamed: 0,Period,Date,ZIP Code,Quality of Life,Overall Feeling of Safety,Neighborhood Safety,Neighborhood Appearance,Nashville as Residence,Nashville as Place to Raise Children,Nashville as Workplace,...,Childcare Access,Pre-K Access,Public Transit Access,Diversity Support Efforts,Diversity Acceptance,Disability Access,Farmers' Market,General Hospital,Gender Identity,Id
0,2018Q2,07/16/2018,37208,Neutral,Neutral,Neutral,Satisfied,Neutral,Good,Neutral,...,Satisfied,Satisfied,Satisfied,Satisfied,Neutral,Satisfied,Very Satisfied,Neutral,Female,1
1,2018Q2,07/16/2018,37211,Satisfied,Satisfied,Satisfied,Satisfied,Good,Good,Good,...,Very Satisfied,Very Satisfied,Dissatisfied,Satisfied,Dissatisfied,Dissatisfied,Satisfied,Don't Know,Female,2
2,2018Q2,07/16/2018,37208,Very Satisfied,Neutral,Satisfied,Satisfied,Excellent,Excellent,Excellent,...,Don't Know,Don't Know,Dissatisfied,Neutral,Satisfied,Satisfied,Satisfied,Don't Know,Male,3


In [13]:
survey2.columns.values

array(['Period', 'Date', 'ZIP Code', 'Quality of Life',
       'Overall Feeling of Safety', 'Neighborhood Safety',
       'Neighborhood Appearance', 'Nashville as Residence',
       'Nashville as Place to Raise Children', 'Nashville as Workplace',
       'Nashville as Retirement Place', 'Metro Services - Overall',
       'Value Received from Taxes', 'Elected Official Leadership',
       'Metro Transparency and Ethics', 'Customer Service',
       'Metro Communication', 'Decision Input Opportunity',
       'Crime Prevention', 'Police - Overall', 'Fire and EMS - Overall',
       'Streets and Sidewalks - Overall', 'Trash and Recycling - Overall',
       'Public Education', 'Public Library', 'Social Services - Overall',
       'Parks and Recreation - Overall', 'Public Transit - Overall',
       'Metro-provided Arts and Culture - Overall',
       'City Street Maintenance', 'Street Signs and Traffic Signals',
       'Bicycle Infrastructure', 'Trash Service', 'Curbside Recycling',
       'Drop

In [14]:
survey_info = survey2[['Id','Period', 'Date', 'ZIP Code', 'Quality of Life',
       'Overall Feeling of Safety', 'Neighborhood Safety',
       'Neighborhood Appearance', 'Nashville as Residence',
       'Nashville as Place to Raise Children', 'Nashville as Workplace',
       'Nashville as Retirement Place', 'Gender Identity']]
survey_info.head(2)

Unnamed: 0,Id,Period,Date,ZIP Code,Quality of Life,Overall Feeling of Safety,Neighborhood Safety,Neighborhood Appearance,Nashville as Residence,Nashville as Place to Raise Children,Nashville as Workplace,Nashville as Retirement Place,Gender Identity
0,1,2018Q2,07/16/2018,37208,Neutral,Neutral,Neutral,Satisfied,Neutral,Good,Neutral,Neutral,Female
1,2,2018Q2,07/16/2018,37211,Satisfied,Satisfied,Satisfied,Satisfied,Good,Good,Good,Good,Female


In [15]:
metro_gov = survey2[['Id','Metro Services - Overall',
       'Value Received from Taxes', 'Elected Official Leadership',
       'Metro Transparency and Ethics', 'Customer Service',
       'Metro Communication', 'Decision Input Opportunity']]
metro_gov.head(2)

Unnamed: 0,Id,Metro Services - Overall,Value Received from Taxes,Elected Official Leadership,Metro Transparency and Ethics,Customer Service,Metro Communication,Decision Input Opportunity
0,1,Neutral,Neutral,Neutral,Neutral,Neutral,Neutral,Neutral
1,2,Satisfied,Satisfied,Neutral,Neutral,Satisfied,Satisfied,Dissatisfied


In [16]:
safety = survey2[['Id', 'Crime Prevention', 'Police - Overall', 'Fire and EMS - Overall', 'Police Visibility',
       'Traffic Enforcement', 'Police Professionalism',
       'Police Response Time', 'Fire Services', 'Fire Response Time',
       'Emergency Medical Services (EMS)', 'EMS Response Time',
       'Fire Professionalism', 'EMS Professionalism', '911 Services',
       'Non-Emergency Number']]
safety.head(2)

Unnamed: 0,Id,Crime Prevention,Police - Overall,Fire and EMS - Overall,Police Visibility,Traffic Enforcement,Police Professionalism,Police Response Time,Fire Services,Fire Response Time,Emergency Medical Services (EMS),EMS Response Time,Fire Professionalism,EMS Professionalism,911 Services,Non-Emergency Number
0,1,Very Dissatisfied,Very Dissatisfied,Satisfied,Neutral,Neutral,Neutral,Dissatisfied,Neutral,Very Satisfied,Satisfied,Satisfied,Neutral,Neutral,Very Satisfied,Satisfied
1,2,Dissatisfied,Satisfied,Very Satisfied,Satisfied,Dissatisfied,Satisfied,Don't Know,Satisfied,Don't Know,Don't Know,Don't Know,Very Satisfied,Very Satisfied,Don't Know,Don't Know


In [17]:
general_services = survey2[['Id', 'Streets and Sidewalks - Overall', 'Trash and Recycling - Overall',
       'Public Education', 'Public Library', 'Social Services - Overall',
       'Parks and Recreation - Overall', 'Public Transit - Overall',
       'Metro-provided Arts and Culture - Overall',
       'City Street Maintenance', 'Street Signs and Traffic Signals',
       'Bicycle Infrastructure', 'Trash Service', 'Curbside Recycling',
       'Drop-off Recycling', 'Street Lighting', 'hubNashville (311)',
       'Program Information Availability', 'Nashville.gov',
       'Metro Video Programming', 'Metro Social Media']]
general_services.head(2)

Unnamed: 0,Id,Streets and Sidewalks - Overall,Trash and Recycling - Overall,Public Education,Public Library,Social Services - Overall,Parks and Recreation - Overall,Public Transit - Overall,Metro-provided Arts and Culture - Overall,City Street Maintenance,...,Bicycle Infrastructure,Trash Service,Curbside Recycling,Drop-off Recycling,Street Lighting,hubNashville (311),Program Information Availability,Nashville.gov,Metro Video Programming,Metro Social Media
0,1,Satisfied,Neutral,Dissatisfied,Neutral,Neutral,Satisfied,Satisfied,Neutral,Very Dissatisfied,...,Satisfied,Satisfied,Neutral,Neutral,Neutral,Satisfied,Satisfied,Satisfied,Satisfied,Satisfied
1,2,Dissatisfied,Very Dissatisfied,Satisfied,Very Satisfied,Neutral,Satisfied,Very Dissatisfied,Satisfied,Neutral,...,Dissatisfied,Very Satisfied,Very Dissatisfied,Neutral,Neutral,Don't Know,Satisfied,Don't Know,Don't Know,Satisfied


In [18]:
transportation = survey2[['Id', 'Music City Circuit', 'Music City Star', 'Park and Ride',
       'Bus Shelters']]
transportation.head(2)

Unnamed: 0,Id,Music City Circuit,Music City Star,Park and Ride,Bus Shelters
0,1,Satisfied,Don't Know,Satisfied,Neutral
1,2,Neutral,Don't Know,Dissatisfied,Satisfied


In [19]:
codes = survey2[['Id','Trash and Debris Enforcement',
       'Lawn and Brush Enforcement', 'Exterior Maintenance Enforcement',
       'Short-Term Rental Enforcement']]
codes.head(2)

Unnamed: 0,Id,Trash and Debris Enforcement,Lawn and Brush Enforcement,Exterior Maintenance Enforcement,Short-Term Rental Enforcement
0,1,Neutral,Neutral,Neutral,Neutral
1,2,Satisfied,Satisfied,Satisfied,Neutral


In [20]:
recreation = survey2[['Id', 'Parks Maintenance', 'Outdoor Athletic Fields',
       'Walking and Biking Trails', 'Swimming Pools', 'Community Centers',
       'Youth Recreation Programs', 'Senior Recreation Programs']]
recreation.head(2)

Unnamed: 0,Id,Parks Maintenance,Outdoor Athletic Fields,Walking and Biking Trails,Swimming Pools,Community Centers,Youth Recreation Programs,Senior Recreation Programs
0,1,Neutral,Neutral,Neutral,Very Dissatisfied,Very Satisfied,Satisfied,Satisfied
1,2,Satisfied,Satisfied,Satisfied,Neutral,Satisfied,Don't Know,Don't Know


In [21]:
equality_and_access = survey2[['Id', 'Healthcare Access', 'Healthy Food Access', 'Childcare Access',
       'Pre-K Access', 'Public Transit Access',
       'Diversity Support Efforts', 'Diversity Acceptance',
       'Disability Access']]

In [22]:
%who

codes	 cols	 cols_to_keep	 create_engine	 equality_and_access	 general_services	 metro_gov	 pd	 recreation	 
safety	 sqlite3	 survey	 survey2	 survey_info	 transportation	 


#### Tables to write 
 - survey_info
 - metro_gov
 - equality_and_access
 - general_services
 - safety
 - recreation
 - transportation
 - codes

In [23]:
engine = create_engine("sqlite:///metro_survey.db")

In [24]:
connection = engine.connect()
survey_info.to_sql('info', engine, if_exists = 'replace')
metro_gov.to_sql('overall', engine, if_exists = 'replace')
equality_and_access.to_sql('equality_and_access', engine, if_exists = 'replace')
general_services.to_sql('general_services', engine, if_exists = 'replace')
recreation.to_sql('recreation', engine, if_exists = 'replace')
transportation.to_sql('transportation', engine, if_exists = 'replace')
codes.to_sql('codes', engine, if_exists = 'replace')
safety.to_sql('safety', engine, if_exists = 'replace')
    
connection.close()


In [37]:
(safety.Id.value_counts() > 1).sum()

0