In [53]:
#Imports
import pandas as pd
import pyodbc
import sqlalchemy
from sqlalchemy import create_engine
import urllib
import numpy as np

In [54]:
# Watermark
print('Nathan Young\nJunior Data Analyst\nCenter for the Study of Free Enterprise')
%load_ext watermark
%watermark -a "Western Carolina University" -u -d -p pandas

Nathan Young
Junior Data Analyst
Center for the Study of Free Enterprise
The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
Western Carolina University 
last updated: 2020-03-03 

pandas 1.0.1


In [55]:
# Create Backups
df_backup = pd.read_csv('../Updates/STG_ZLLW_County_MedianSalePrice_AllHomes.txt')
df_backup.to_csv('../Backups/STG_ZLLW_County_MedianSalePrice_AllHomes_BACKUP.txt')

In [56]:
#Load Land data
df = pd.read_csv('http://files.zillowstatic.com/research/public/County/Sale_Prices_County.csv',
                     encoding='ISO-8859-1')

In [57]:
df = df.drop(columns = ['RegionID'], axis = 1)

In [58]:
#Filter data to NC
filter1 = df['StateName'] == "North Carolina"
df_nc = df[filter1]

In [59]:
#Sort by Region Name
df_nc = df_nc.sort_values('RegionName', ascending = True)
df_nc

Unnamed: 0,RegionName,StateName,SizeRank,2008-03,2008-04,2008-05,2008-06,2008-07,2008-08,2008-09,...,2019-04,2019-05,2019-06,2019-07,2019-08,2019-09,2019-10,2019-11,2019-12,2020-01
385,Alamance County,North Carolina,386,136200.0,123900.0,128300.0,135800.0,139800.0,139600.0,129700.0,...,166800.0,165900.0,171000.0,168200.0,171200.0,173800.0,171500.0,161800.0,162300.0,154400.0
1100,Anson County,North Carolina,1101,,,,,,,,...,87800.0,83500.0,87600.0,67900.0,77900.0,75600.0,77900.0,73800.0,96000.0,78800.0
1095,Ashe County,North Carolina,1096,,,,,,,,...,203500.0,201400.0,199300.0,196000.0,205900.0,201300.0,189900.0,183800.0,202200.0,
499,Brunswick County,North Carolina,500,,,,,,,,...,236500.0,235100.0,235400.0,237300.0,239900.0,242400.0,244000.0,244600.0,252000.0,250200.0
250,Buncombe County,North Carolina,251,,,,,,,,...,291100.0,288400.0,288900.0,290700.0,289100.0,292400.0,291900.0,291400.0,300800.0,309600.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51,Wake County,North Carolina,52,186500.0,182500.0,180600.0,180400.0,182500.0,182100.0,179900.0,...,277500.0,272300.0,271200.0,273300.0,278500.0,280800.0,282700.0,284000.0,288200.0,292400.0
809,Watauga County,North Carolina,810,,,,,,,,...,273000.0,270400.0,271800.0,280300.0,273800.0,277900.0,277300.0,277000.0,273500.0,277400.0
672,Wilkes County,North Carolina,673,,,,,,,,...,131400.0,138100.0,143700.0,143900.0,138800.0,132700.0,131900.0,131300.0,145000.0,
607,Wilson County,North Carolina,608,,,,,,,,...,134900.0,121500.0,116200.0,119100.0,122800.0,123100.0,119100.0,124900.0,124300.0,129300.0


In [60]:
df_fips = pd.read_csv('../FIPS_Codes.csv')
df_fips

Unnamed: 0,RegionName,State,Metro,StateCodeFIPS,GeoFIPS,MunicipalCodeFIPS
0,Alamance County,NC,Burlington,37,37001,1
1,Alexander County,NC,Hickory-Lenoir-Morganton,37,37003,3
2,Alleghany County,NC,,37,37005,5
3,Anson County,NC,,37,37007,7
4,Ashe County,NC,,37,37009,9
...,...,...,...,...,...,...
95,Wayne County,NC,Goldsboro,37,37191,191
96,Wilkes County,NC,North Wilkesboro,37,37193,193
97,Wilson County,NC,Wilson,37,37195,195
98,Yadkin County,NC,Winston-Salem,37,37197,197


In [61]:
df_msp_nc = df_nc.set_index('RegionName').join(df_fips.set_index('RegionName'))

In [62]:
#Change MunicipalCodeFIPS dtype to add leading 0's
df_msp_nc.loc[ :, 'MunicipalCodeFIPS'] = df_msp_nc['MunicipalCodeFIPS'].astype(str)

In [63]:
#Add leading 0's and check to ensure they were added
df_msp_nc.loc[ :, 'MunicipalCodeFIPS'] = df_msp_nc['MunicipalCodeFIPS'].str.zfill(3)
df_msp_nc.head(5)

Unnamed: 0_level_0,StateName,SizeRank,2008-03,2008-04,2008-05,2008-06,2008-07,2008-08,2008-09,2008-10,...,2019-09,2019-10,2019-11,2019-12,2020-01,State,Metro,StateCodeFIPS,GeoFIPS,MunicipalCodeFIPS
RegionName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alamance County,North Carolina,386,136200.0,123900.0,128300.0,135800.0,139800.0,139600.0,129700.0,127800.0,...,173800.0,171500.0,161800.0,162300.0,154400.0,NC,Burlington,37,37001,1
Anson County,North Carolina,1101,,,,,,,,,...,75600.0,77900.0,73800.0,96000.0,78800.0,NC,,37,37007,7
Ashe County,North Carolina,1096,,,,,,,,,...,201300.0,189900.0,183800.0,202200.0,,NC,,37,37009,9
Brunswick County,North Carolina,500,,,,,,,,,...,242400.0,244000.0,244600.0,252000.0,250200.0,NC,Myrtle Beach-Conway-North Myrtle Beach,37,37019,19
Buncombe County,North Carolina,251,,,,,,,,,...,292400.0,291900.0,291400.0,300800.0,309600.0,NC,Asheville,37,37021,21


In [64]:
columns = ['State','Metro','StateCodeFIPS','MunicipalCodeFIPS','SizeRank',
        '2008-03',
        '2008-04',
        '2008-05',
        '2008-06',
        '2008-07',
        '2008-08',
        '2008-09',
        '2008-10',
        '2008-11',
        '2008-12',
        '2009-01',
        '2009-02',
        '2009-03',
        '2009-04',
        '2009-05',
        '2009-06',
        '2009-07',
        '2009-08',
        '2009-09',
        '2009-10',
        '2009-11',
        '2009-12',
        '2010-01',
        '2010-02',
        '2010-03',
        '2010-04',
        '2010-05',
        '2010-06',
        '2010-07',
        '2010-08',
        '2010-09',
        '2010-10',
        '2010-11',
        '2010-12',
        '2011-01',
        '2011-02',
        '2011-03',
        '2011-04',
        '2011-05',
        '2011-06',
        '2011-07',
        '2011-08',
        '2011-09',
        '2011-10',
        '2011-11',
        '2011-12',
        '2012-01',
        '2012-02',
        '2012-03',
        '2012-04',
        '2012-05',
        '2012-06',
        '2012-07',
        '2012-08',
        '2012-09',
        '2012-10',
        '2012-11',
        '2012-12',
        '2013-01',
        '2013-02',
        '2013-03',
        '2013-04',
        '2013-05',
        '2013-06',
        '2013-07',
        '2013-08',
        '2013-09',
        '2013-10',
        '2013-11',
        '2013-12',
        '2014-01',
        '2014-02',
        '2014-03',
        '2014-04',
        '2014-05',
        '2014-06',
        '2014-07',
        '2014-08',
        '2014-09',
        '2014-10',
        '2014-11',
        '2014-12',
        '2015-01',
        '2015-02',
        '2015-03',
        '2015-04',
        '2015-05',
        '2015-06',
        '2015-07',
        '2015-08',
        '2015-09',
        '2015-10',
        '2015-11',
        '2015-12',
        '2016-01',
        '2016-02',
        '2016-03',
        '2016-04',
        '2016-05',
        '2016-06',
        '2016-07',
        '2016-08',
        '2016-09',
        '2016-10',
        '2016-11',
        '2016-12',
        '2017-01',
        '2017-02',
        '2017-03',
        '2017-04',
        '2017-05',
        '2017-06',
        '2017-07',
        '2017-08',
        '2017-09',
        '2017-10',
        '2017-11',
        '2017-12',
        '2018-01',
        '2018-02',
        '2018-03',
        '2018-04',
        '2018-05',
        '2018-06',
        '2018-07',
        '2018-08',
        '2018-09',
        '2018-10',
        '2018-11',
        '2018-12',
        '2019-01',
        '2019-02',
        '2019-03',
        '2019-04',
        '2019-05',
        '2019-06',
        '2019-07',
        '2019-08',
        '2019-09',
        '2019-10',
        '2019-11',
        '2019-12',
        '2020-01']
df_msp_nc = df_msp_nc[columns]
df_msp_nc

Unnamed: 0_level_0,State,Metro,StateCodeFIPS,MunicipalCodeFIPS,SizeRank,2008-03,2008-04,2008-05,2008-06,2008-07,...,2019-04,2019-05,2019-06,2019-07,2019-08,2019-09,2019-10,2019-11,2019-12,2020-01
RegionName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alamance County,NC,Burlington,37,001,386,136200.0,123900.0,128300.0,135800.0,139800.0,...,166800.0,165900.0,171000.0,168200.0,171200.0,173800.0,171500.0,161800.0,162300.0,154400.0
Anson County,NC,,37,007,1101,,,,,,...,87800.0,83500.0,87600.0,67900.0,77900.0,75600.0,77900.0,73800.0,96000.0,78800.0
Ashe County,NC,,37,009,1096,,,,,,...,203500.0,201400.0,199300.0,196000.0,205900.0,201300.0,189900.0,183800.0,202200.0,
Brunswick County,NC,Myrtle Beach-Conway-North Myrtle Beach,37,019,500,,,,,,...,236500.0,235100.0,235400.0,237300.0,239900.0,242400.0,244000.0,244600.0,252000.0,250200.0
Buncombe County,NC,Asheville,37,021,251,,,,,,...,291100.0,288400.0,288900.0,290700.0,289100.0,292400.0,291900.0,291400.0,300800.0,309600.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wake County,NC,Raleigh,37,183,52,186500.0,182500.0,180600.0,180400.0,182500.0,...,277500.0,272300.0,271200.0,273300.0,278500.0,280800.0,282700.0,284000.0,288200.0,292400.0
Watauga County,NC,Boone,37,189,810,,,,,,...,273000.0,270400.0,271800.0,280300.0,273800.0,277900.0,277300.0,277000.0,273500.0,277400.0
Wilkes County,NC,North Wilkesboro,37,193,673,,,,,,...,131400.0,138100.0,143700.0,143900.0,138800.0,132700.0,131900.0,131300.0,145000.0,
Wilson County,NC,Wilson,37,195,608,,,,,,...,134900.0,121500.0,116200.0,119100.0,122800.0,123100.0,119100.0,124900.0,124300.0,129300.0


In [65]:
#Save to csv file for export in Excel
df_msp_nc.to_csv('../Updates/STG_ZLLW_County_MedianSalePrice_AllHomes.txt', sep ='\t')

In [66]:
#Reset Index for upload to database
df_msp_nc = df_msp_nc.reset_index()    

In [67]:
#Fill NaN values for upload to database
df_msp_nc['Metro'] = df_msp_nc['Metro'].replace(np.nan,'', regex=True)

column_list = df_msp_nc.columns.values
for i in column_list:
    df_msp_nc.loc[df_msp_nc[i].isnull(),i]=0

In [68]:
#Connect to database and create cursor
con = pyodbc.connect('Driver={SQL Server};'
                      'Server=TITANIUM-BOOK;'
                      'Database=DataDashboard;'
                      'Trusted_Connection=yes;',
                    autocommit=True)

c = con.cursor()

In [69]:
#Drop old backup table
c.execute('drop table STG_ZLLW_County_MedianSalePrice_AllHomes_BACKUP')

<pyodbc.Cursor at 0x1bc634475b0>

In [70]:
#Create new backup
c.execute('''sp_rename 'dbo.STG_ZLLW_County_MedianSalePrice_AllHomes','STG_ZLLW_County_MedianSalePrice_AllHomes_BACKUP';''')

<pyodbc.Cursor at 0x1bc634475b0>

In [71]:
c.execute('''USE [DataDashboard]

SET ANSI_NULLS ON

SET QUOTED_IDENTIFIER ON

CREATE TABLE [dbo].[STG_ZLLW_County_MedianSalePrice_AllHomes](
	[RegionName] [varchar](40) NULL,
	[State] [varchar](2) NULL,
	[Metro] [varchar](40) NULL,
	[StateCodeFIPS] [varchar](2) NULL,
	[MunicipalCodeFIPS] [varchar](3) NULL,
	[SizeRank] [smallint] NULL,
	[2010-01] [float] NULL,
	[2010-02] [float] NULL,
	[2010-03] [float] NULL,
	[2010-04] [float] NULL,
	[2010-05] [float] NULL,
	[2010-06] [float] NULL,
	[2010-07] [float] NULL,
	[2010-08] [float] NULL,
	[2010-09] [float] NULL,
	[2010-10] [float] NULL,
	[2010-11] [float] NULL,
	[2010-12] [float] NULL,
	[2011-01] [float] NULL,
	[2011-02] [float] NULL,
	[2011-03] [float] NULL,
	[2011-04] [float] NULL,
	[2011-05] [float] NULL,
	[2011-06] [float] NULL,
	[2011-07] [float] NULL,
	[2011-08] [float] NULL,
	[2011-09] [float] NULL,
	[2011-10] [float] NULL,
	[2011-11] [float] NULL,
	[2011-12] [float] NULL,
	[2012-01] [float] NULL,
	[2012-02] [float] NULL,
	[2012-03] [float] NULL,
	[2012-04] [float] NULL,
	[2012-05] [float] NULL,
	[2012-06] [float] NULL,
	[2012-07] [float] NULL,
	[2012-08] [float] NULL,
	[2012-09] [float] NULL,
	[2012-10] [float] NULL,
	[2012-11] [float] NULL,
	[2012-12] [float] NULL,
	[2013-01] [float] NULL,
	[2013-02] [float] NULL,
	[2013-03] [float] NULL,
	[2013-04] [float] NULL,
	[2013-05] [float] NULL,
	[2013-06] [float] NULL,
	[2013-07] [float] NULL,
	[2013-08] [float] NULL,
	[2013-09] [float] NULL,
	[2013-10] [float] NULL,
	[2013-11] [float] NULL,
	[2013-12] [float] NULL,
	[2014-01] [float] NULL,
	[2014-02] [float] NULL,
	[2014-03] [float] NULL,
	[2014-04] [float] NULL,
	[2014-05] [float] NULL,
	[2014-06] [float] NULL,
	[2014-07] [float] NULL,
	[2014-08] [float] NULL,
	[2014-09] [float] NULL,
	[2014-10] [float] NULL,
	[2014-11] [float] NULL,
	[2014-12] [float] NULL,
	[2015-01] [float] NULL,
	[2015-02] [float] NULL,
	[2015-03] [float] NULL,
	[2015-04] [float] NULL,
	[2015-05] [float] NULL,
	[2015-06] [float] NULL,
	[2015-07] [float] NULL,
	[2015-08] [float] NULL,
	[2015-09] [float] NULL,
	[2015-10] [float] NULL,
	[2015-11] [float] NULL,
	[2015-12] [float] NULL,
	[2016-01] [float] NULL,
	[2016-02] [float] NULL,
	[2016-03] [float] NULL,
	[2016-04] [float] NULL,
	[2016-05] [float] NULL,
	[2016-06] [float] NULL,
	[2016-07] [float] NULL,
	[2016-08] [float] NULL,
	[2016-09] [float] NULL,
	[2016-10] [float] NULL,
	[2016-11] [float] NULL,
	[2016-12] [float] NULL,
	[2017-01] [float] NULL,
	[2017-02] [float] NULL,
	[2017-03] [float] NULL,
	[2017-04] [float] NULL,
	[2017-05] [float] NULL,
	[2017-06] [float] NULL,
	[2017-07] [float] NULL,
	[2017-08] [float] NULL,
	[2017-09] [float] NULL,
	[2017-10] [float] NULL,
	[2017-11] [float] NULL,
	[2017-12] [float] NULL,
	[2018-01] [float] NULL,
	[2018-02] [float] NULL,
	[2018-03] [float] NULL,
	[2018-04] [float] NULL,
	[2018-05] [float] NULL,
	[2018-06] [float] NULL,
	[2018-07] [float] NULL,
	[2018-08] [float] NULL,
	[2018-09] [float] NULL,
	[2018-10] [float] NULL,
	[2018-11] [float] NULL,
	[2018-12] [float] NULL,
	[2019-01] [float] NULL,
	[2019-02] [float] NULL,
	[2019-03] [float] NULL,
	[2019-04] [float] NULL,
	[2019-05] [float] NULL,
	[2019-06] [float] NULL,
	[2019-07] [float] NULL,
	[2019-08] [float] NULL,
	[2019-09] [float] NULL,
	[2019-10] [float] NULL,
	[2019-11] [float] NULL,
	[2019-12] [float] NULL,
    [2020-01] [float] NULL,
    [2020-02] [float] NULL,
    [2020-03] [float] NULL,
    [2020-04] [float] NULL,
    [2020-05] [float] NULL,
    [2020-06] [float] NULL,
    [2020-07] [float] NULL,
    [2020-08] [float] NULL,
    [2020-09] [float] NULL,
    [2020-10] [float] NULL,
    [2020-11] [float] NULL,
    [2020-12] [float] NULL,
    [2021-01] [float] NULL,
    [2021-02] [float] NULL,
    [2021-03] [float] NULL,
    [2021-04] [float] NULL,
    [2021-05] [float] NULL,
    [2021-06] [float] NULL,
    [2021-07] [float] NULL,
    [2021-08] [float] NULL,
    [2021-09] [float] NULL,
    [2021-10] [float] NULL,
    [2021-11] [float] NULL,
    [2021-12] [float] NULL,
    [2022-01] [float] NULL,
    [2022-02] [float] NULL,
    [2022-03] [float] NULL,
    [2022-04] [float] NULL,
    [2022-05] [float] NULL,
    [2022-06] [float] NULL,
    [2022-07] [float] NULL,
    [2022-08] [float] NULL,
    [2022-09] [float] NULL,
    [2022-10] [float] NULL,
    [2022-11] [float] NULL,
    [2022-12] [float] NULL
) ON [PRIMARY]''')

<pyodbc.Cursor at 0x1bc634475b0>

In [72]:
params = urllib.parse.quote_plus(r'Driver={SQL Server};' 
                                 r'Server=TITANIUM-BOOK;'
                                 r'Database=DataDashboard;'
                                 r'Trusted_Connection=yes;')

engine = create_engine("mssql+pyodbc:///?odbc_connect=%s" % params)

#warning: discard old table if exists
df_msp_nc.to_sql('STG_ZLLW_County_MedianSalePrice_AllHomes', con=engine, if_exists='replace', index=False)