# Scott Breitbach
## Milestone 5: Merging the Data and Storing in a Database/Visualizing Data
## 26-Feb-2021
## DSC540, Weeks 11-12

In [1]:
import sqlite3
import pandas as pd

# Load data into an SQL database, each as an individual table.

In [2]:
# Load the previously cleaned data from CSV files:
csvData = pd.read_csv("CSV_nebraskaCleaned.csv")
webData = pd.read_csv("WEB_NEbeerReddit.csv")
apiData = pd.read_csv("API_beerMapping.csv")

### First, a little extra cleaning:

#### Replace Spaces in column names with underscores

In [None]:
def colSpaceToUnderscore(df):
    '''Replaces spaces with underscores in the column names of a DataFrame'''
    newCols = []
    for c in df.columns:
        newCols.append(c.replace(' ', '_'))
    df.columns = newCols
    print(f"New column names:\n{df.columns}")  

In [None]:
colSpaceToUnderscore(csvData)
colSpaceToUnderscore(webData)
colSpaceToUnderscore(apiData)

#### Make street addresses a uniform format

In [None]:
import pandas_usaddress

def standardizeAddress(df, colName):
    '''Creates street address uniformity and makes Title Case;
    Replaces existing street address column.'''
    tempDF = pandas_usaddress.tag(df, [colName], granularity='single', standardize=True) 
    df[colName] = tempDF.SingleLine.str.title()

In [None]:
standardizeAddress(csvData, 'Street_Address')
standardizeAddress(apiData, 'StreetAddress')

#### Add a column for consolidating Omaha suburbs
https://en.wikipedia.org/wiki/Neighborhoods_of_Omaha,_Nebraska

In [None]:
import requests

In [None]:
url = "https://en.wikipedia.org/wiki/Neighborhoods_of_Omaha,_Nebraska"
# Get the table I want from the Wikipedia page:
listOfTables = pd.read_html(url)
omahaSubsDF = listOfTables[1]
# Generate a list of Omaha suburbs from the first column (adding a couple):
omahaSubs = list(omahaSubsDF['Neighborhoods in Omaha alphabetical order'][1:])
omahaSubs.extend(['Omaha', 'LaVista'])

In [None]:
def omahaSuburbs(df, colName):
    '''Creates a copy of the City column and converts any locations 
    found in the list of Omaha suburbs to "Omaha"'''
    df['Central_City'] = df[colName]
    df.loc[(df['Central_City'].isin(omahaSubs) == True), 'Central_City'] = "Omaha"

In [None]:
omahaSuburbs(csvData, 'City')
omahaSuburbs(webData, 'City')
omahaSuburbs(apiData, 'City')

In [3]:
# Load data into an SQL database, each as an individual table:
with sqlite3.connect('Nebeerska.db') as conn:
    cursor = conn.cursor()
    csvData.to_sql('csvtable', conn, if_exists='replace', index=False)
    webData.to_sql('webtable', conn, if_exists='replace', index=False)
    apiData.to_sql('apitable', conn, if_exists='replace', index=False)

  sql.to_sql(


### Get a list of the tables:

In [22]:
res = conn.execute('''SELECT name FROM sqlite_master WHERE type='table';''')
tableList = []
for name in res:
    print(name[0])
    tableList.append(name[0])

csvtable
webtable
apitable


# Join the datasets together into 1 dataset.

### Check out the first few rows of each table:

In [12]:
def head(table, nRows=5):
    cursor.execute(f'''SELECT * FROM {table}''')
    for row in cursor.fetchall()[:nRows]:
        print(row)

In [26]:
for i in tableList:
    print(f"Table: '{i}' - ")
    head(i, 3)
    print()

Table: 'csvtable' - 
('backswing-brewing-co-lincoln', 'Backswing Brewing Co.', 'Micro', '500 W South St Ste 8', 'Lincoln', 'Nebraska', '68522-1744', 68522, 'http://www.backswingbrewing.com', '(402) 515-4263', None, None)
('benson-brewery-omaha', 'Benson Brewery', 'Brewpub', '6059 Maple St', 'Omaha', 'Nebraska', '68104-4050', 68104, 'http://www.bensonbrewery.com', '(402) 934-8668', -96.00634511, 41.28489756)
('blue-blood-brewing-company-lincoln', 'Blue Blood Brewing Company', 'Brewpub', '925 Robbers Cave Rd', 'Lincoln', 'Nebraska', '68502', 68502, 'http://www.bluebloodbrewing.com', '(402) 477-2337', -96.70701376, 40.77999602)

Table: 'webtable' - 
('Backswing Brewing Co.', 'Lincoln', 'Nebraska', 'Micro', 'On-tap at the brewery and at select locations', 'Distro planned for 2016', 1, 'Lincoln')
('Benson Brewery', 'Omaha', 'Nebraska', 'Taproom', 'On tap only at the brewery', 'Other beers, locals included, on tap. Omaha Brewing is the parent company', 1, 'Omaha')
('Boiler Brewing Company', 

# Create 5 visualizations that demonstrate the data.
Note: At least 2 should have data from > 1 source.