In [1]:
# Import pandas dependencies
import pandas as pd
import numpy as np

from sqlalchemy import create_engine

In [2]:
# load smoking rate data
smoking_rate_df = pd.read_csv('Resources/smoking_rate.csv')
smoking_rate_df.head()

Unnamed: 0,State,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Alabama,24.3,23.8,21.5,21.1,21.4,21.5,20.9,19.2,20.2,17.2
1,Alaska,22.9,20.5,22.6,19.9,19.1,19.0,21.0,19.1,17.4,17.1
2,Arizona,19.3,17.1,16.3,16.5,14.0,14.7,15.6,14.0,14.9,13.1
3,Arkansas,27.0,25.0,25.9,24.7,24.9,23.6,22.3,22.7,20.2,21.1
4,California,13.7,12.6,12.5,12.9,11.7,11.0,11.3,11.2,10.0,8.8


In [4]:
# melt smoking rate dataframe
smoking_rate_melt_df = pd.melt(smoking_rate_df, 
                               id_vars = 'State', 
                               var_name = 'year', 
                               value_name ='smoking_rate')
smoking_rate_melt_df.head()

Unnamed: 0,State,year,smoking_rate
0,Alabama,2011,24.3
1,Alaska,2011,22.9
2,Arizona,2011,19.3
3,Arkansas,2011,27.0
4,California,2011,13.7


In [11]:
# shape of smoking rate melt dataframe
smoking_rate_melt_df.shape

(530, 3)

In [5]:
# load obesity rate data
obesity_rate_df = pd.read_csv('Resources/obesity_rate.csv')
obesity_rate_df.head()

Unnamed: 0,State,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,Alabama,33.0,32.0,33.0,32.4,33.5,35.6,35.7,36.3,36.2,36.1
1,Alaska,25.2,27.4,25.7,28.4,29.7,29.8,31.4,34.2,29.5,30.5
2,Arizona,25.2,25.1,26.0,26.8,28.9,28.4,29.0,29.5,30.0,31.4
3,Arkansas,30.9,30.9,34.5,34.6,35.9,34.5,35.7,35.0,37.1,37.4
4,California,24.7,23.8,25.0,24.1,24.7,24.2,25.0,25.1,25.8,26.2


In [6]:
# melt obesity rate dataframe
obesity_rate_melt_df = pd.melt(obesity_rate_df, 
                               id_vars = 'State', 
                               var_name = 'year', 
                               value_name ='obesity_rate')
obesity_rate_melt_df.head()

Unnamed: 0,State,year,obesity_rate
0,Alabama,2010,33.0
1,Alaska,2010,25.2
2,Arizona,2010,25.2
3,Arkansas,2010,30.9
4,California,2010,24.7


In [12]:
# shape of obesity rate melt dataframe
obesity_rate_melt_df.shape

(530, 3)

In [7]:
# load per capita income data
per_capita_income_df = pd.read_csv('Resources/cleaned_per_capita_income.csv')
per_capita_income_df.head()

Unnamed: 0,GeoName,1999,2000,2001,2002,2003,2004,2005,2006,2007,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,United States,28693,30672,31617,31839,32717,34280,35868,38120,39883,...,40683,42747,44548,44798,46887,48725,49613,51550,53786,56250
1,Alabama,23333,24306,25057,25648,26673,28434,29949,31474,32739,...,33946,35010,35824,36014,37055,38531,39014,40223,41539,43288
2,Alaska,29951,32044,33626,34756,35996,37016,39075,41157,43906,...,49652,52569,53708,52812,55867,57575,56278,57189,59618,61316
3,Arizona,24960,26388,27008,27353,28393,30221,32327,34703,35872,...,33876,35321,36545,37139,38756,40334,41473,43497,45466,48124
4,Arkansas,21871,22781,23873,24306,25595,27059,28227,29617,31303,...,32372,34279,36582,36677,38749,39968,40873,41890,43384,44324


In [8]:
# melt per capita income dataframe
per_capita_income_melt_df = pd.melt(per_capita_income_df, 
                                    id_vars = 'GeoName', 
                                    var_name = 'year', 
                                    value_name ='per_capita_income')
per_capita_income_melt_df.head()

Unnamed: 0,GeoName,year,per_capita_income
0,United States,1999,28693
1,Alabama,1999,23333
2,Alaska,1999,29951
3,Arizona,1999,24960
4,Arkansas,1999,21871


In [13]:
# shape of per capita income melt dataframe
per_capita_income_melt_df.shape

(1092, 3)

In [15]:
# merge smoking and obesity melt dataframe 
obesity_smoking_df = pd.merge(obesity_rate_melt_df, smoking_rate_melt_df,
                              how = 'outer',
                              left_on=['State','year'], 
                              right_on=['State','year'])
obesity_smoking_df.head()

Unnamed: 0,State,year,obesity_rate,smoking_rate
0,Alabama,2010,33.0,
1,Alaska,2010,25.2,
2,Arizona,2010,25.2,
3,Arkansas,2010,30.9,
4,California,2010,24.7,


In [16]:
# shape of obesity_smoking_df melt dataframe
obesity_smoking_df.shape

(583, 4)

In [21]:
# merge per_capita_income_melt and obesity_smoking_df dataframe
demographic_df = pd.merge(per_capita_income_melt_df, obesity_smoking_df,
                              how = 'outer',
                              left_on=['GeoName','year'], 
                              right_on=['State','year'])
demographic_df.head()

Unnamed: 0,GeoName,year,per_capita_income,State,obesity_rate,smoking_rate
0,United States,1999,28693.0,,,
1,Alabama,1999,23333.0,,,
2,Alaska,1999,29951.0,,,
3,Arizona,1999,24960.0,,,
4,Arkansas,1999,21871.0,,,


In [22]:
# shape of demographic_df dataframe
demographic_df.shape

(1165, 6)

In [23]:
# drop Sate column
demographic_df.drop(columns = ['State'], inplace = True)
demographic_df.head()

Unnamed: 0,GeoName,year,per_capita_income,obesity_rate,smoking_rate
0,United States,1999,28693.0,,
1,Alabama,1999,23333.0,,
2,Alaska,1999,29951.0,,
3,Arizona,1999,24960.0,,
4,Arkansas,1999,21871.0,,


In [24]:
# shape of demographic_df dataframe
demographic_df.shape

(1165, 5)

## push demographic data to postgres database

In [25]:
# Set connection for sql database

dialect = 'postgresql'
username = ''
password = ''
host = ''
port = '5432'
database = ''

conn_string = f"{dialect}://{username}:{password}@{host}:{port}/{database}"
        
# dialet+driver://username:password@host:port/database

In [26]:
# Create engine 
engine = create_engine(conn_string)

In [27]:
# demographic Data to SQL database
demographic_df.to_sql('demographic_table', con = engine, if_exists = 'replace', index = False)