In [1]:
import psycopg2
import pandas as pd
import os
import csv

In [None]:
# getting the password for the database from an environment variable
db_pw = os.environ.get('DB_PASS')

In [None]:
# Connecting to the Postgres database
try:
    conn = psycopg2.connect(f"host=localhost user=postgres dbname=postgres password={db_pw}")
except psycopg2.Error as e:
    print('Error: No Conection to database possible.')
    print(e)

In [None]:
# Creating a cursor object for executing the SQL queries
try:
    cur = conn.cursor()
except psycopg2.Error as e:
    print(e)

In [None]:
# this setting reduces coding a commit for every query
conn.set_session(autocommit=True)

In [None]:
# Creating the Database for the NFL data
try:
    cur.execute("CREATE DATABASE NFL_Scores_Bets")
except psycopg2.Error as e:
    print(e)

In [None]:
# Closing of the connection to the postgres database
try:
    conn.close()
except psycopg2.Error as e:
    print(e)

# Connecting to the new database nfl_scores_bets    
try:
    conn = psycopg2.connect(f"host=localhost user=postgres dbname=nfl_scores_bets password={db_pw}")
except psycopg2.Error as e:
    print('Error: Cnnection to database failed')
    print(e)
    
try:
    cur = conn.cursor()
except psycopg2.Error as e:
    print('Cursor failed')
    print(e)
    
conn.set_session(autocommit=True)

# Creating the 3 tables stadiums, teams and scores_bets

In [None]:
try:
    cur.execute("CREATE TABLE IF NOT EXISTS stadiums \
    (stadium_name varchar NOT NULL, \
    stadium_location varchar, \
    stadium_open_year int, \
    stadium_close_year int, \
    stadium_type varchar, \
    stadium_address varchar, \
    stadium_weather_station_code varchar, \
    stadium_weather_type varchar, \
    stadium_capacity int, \
    stadium_surface varchar, \
    PRIMARY KEY (stadium_name))")
except psycopg2.Error as e:
    print('Error: Creating table nfl_stadiums failed')
    print(e)

In [None]:
try:
    cur.execute("CREATE TABLE IF NOT EXISTS teams \
    (team_name varchar PRIMARY KEY, \
    team_name_short varchar, \
    team_id varchar NOT NULL, \
    team_id_pfr varchar, \
    team_conference varchar, \
    team_division varchar, \
    team_conference_pre2002 varchar, \
    team_division_pre2002 varchar)")
except psycopg2.Error as e: 
    print('Error: Creating table teams failed')
    print(e)

In [None]:
# schedule_week must be varchar because there are also texts (Superbwl, Wildcard ....)
try:
    cur.execute("CREATE TABLE IF NOT EXISTS scores_bets \
    (id SERIAL PRIMARY KEY, \
    schedule_date date, \
    schedule_season varchar, \
    schedule_week varchar, \
    schedule_playoff bool, \
    team_home varchar, \
    score_home smallint, \
    score_away smallint, \
    team_away varchar, \
    team_favorite_id varchar, \
    spread_favorite varchar, \
    over_under_line varchar, \
    stadium_name varchar, \
    stadium_neutral bool, \
    weather_temperature numeric, \
    weather_wind_mph numeric, \
    weather_humidity numeric, \
    weather_detail varchar, \
    foreign key (team_home) references teams(team_name), \
    foreign key (team_away) references teams(team_name))")

except psycopg2.Error as e: 
    print('Error: Creating table scores_bets failed')
    print(e)

# Loading of the csv files

In [3]:
teams = pd.read_csv('data/teams.csv')

In [4]:
teams.head()

Unnamed: 0,team_name,team_name_short,team_id,team_id_pfr,team_conference,team_division,team_conference_pre2002,team_division_pre2002
0,Arizona Cardinals,Cardinals,ARI,CRD,NFC,NFC West,NFC,NFC West
1,Atlanta Falcons,Falcons,ATL,ATL,NFC,NFC South,NFC,NFC West
2,Baltimore Colts,Colts,IND,CLT,AFC,,AFC,AFC East
3,Baltimore Ravens,Ravens,BAL,RAV,AFC,AFC North,AFC,AFC Central
4,Boston Patriots,Patriots,NE,NWE,AFC,,AFC,


In [None]:
scores_bets = pd.read_csv('data/scores_bets.csv')

In [None]:
scores_bets.tail()

In [None]:
# changing the dateformat from dd/mm/yyyy to yyyy-mm-dd for postgres insert
scores_bets['schedule_date'] = pd.to_datetime(scores_bets['schedule_date'])
scores_bets.head()

In [None]:
# Replacing NaN Values in the Dataframe to None. 
# Otherwise the columns can´t get insert into Postgres 
scores_bets = scores_bets.where(pd.notnull(scores_bets), None)
scores_bets.tail()

In [None]:
stadiums = pd.read_csv('data/stadiums.csv', encoding='latin1')
stadiums.head()

In [None]:
stadiums = stadiums.where(pd.notnull(stadiums), None)
stadiums.head()

In [None]:
# Deleting the unnessesary rows
stadiums = stadiums[['stadium_name', 'stadium_location', 'stadium_open', 'stadium_close', 'stadium_type', 'stadium_address', 'stadium_weather_station_code', 'stadium_weather_type', 'stadium_capacity', 'stadium_surface']]
stadiums['stadium_capacity'] = stadiums['stadium_capacity'].str.replace(',','')
stadiums.head()

# Insert the data from the dataframes into the database

In [None]:
stadium_table_insert = ("""INSERT INTO stadiums (
stadium_name,
stadium_location,
stadium_open_year,
stadium_close_year,
stadium_type,
stadium_address, 
stadium_weather_station_code,
stadium_weather_type,
stadium_capacity,
stadium_surface) 
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
""")

In [None]:
teams_table_insert = ("""INSERT INTO teams (
team_name,
team_name_short,
team_id,
team_id_pfr,
team_conference,
team_division,
team_conference_pre2002,
team_division_pre2002)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
""")

In [None]:
scores_bets_table_insert = ("""INSERT INTO scores_bets (
schedule_date,
schedule_season,
schedule_week,
schedule_playoff,
team_home,
score_home,
score_away,
team_away,
team_favorite_id,
spread_favorite,
over_under_line,
stadium_name,
stadium_neutral,
weather_temperature,
weather_wind_mph,
weather_humidity,
weather_detail) 
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
""")

In [None]:
for i, row in stadiums.iterrows():
    cur.execute(stadium_table_insert, list(row))

In [None]:
for i, row in teams.iterrows():
    cur.execute(teams_table_insert, list(row))

In [None]:
#for i, row in scores_bets.iterrows():
#    cur.execute(scores_bets_table_insert, list(row))