In [1]:
# ADVANCED DATABASES - LAB #4 #5
# Analysis of input data and constraints of columns
# Continuation of lab #3

# Data set: (len(your_name) + len(your_surname))% 6 = (6+4)%6
# Data set: 4 ,,superbowl-history-1967-2020"

In [2]:
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base

# database_type://user:password@database_url:port/database_name
db_string = "postgresql://postgres:postgres@localhost:5432/postgres"

engine = create_engine(db_string)

In [3]:
# Read data from superbowl.csv
import pandas as pd

df = pd.read_csv("superbowl.csv")

In [4]:
df.head()

Unnamed: 0,Date,SB,Winner,Winner Pts,Loser,Loser Pts,MVP,Stadium,City,State
0,Feb 2 2020,LIV (54),Kansas City Chiefs,31,San Francisco 49ers,20,Patrick Mahomes,Hard Rock Stadium,Miami Gardens,Florida
1,Feb 3 2019,LIII (53),New England Patriots,13,Los Angeles Rams,3,Julian Edelman,Mercedes-Benz Stadium,Atlanta,Georgia
2,Feb 4 2018,LII (52),Philadelphia Eagles,41,New England Patriots,33,Nick Foles,U.S. Bank Stadium,Minneapolis,Minnesota
3,Feb 5 2017,LI (51),New England Patriots,34,Atlanta Falcons,28,Tom Brady,NRG Stadium,Houston,Texas
4,Feb 7 2016,50,Denver Broncos,24,Carolina Panthers,10,Von Miller,Levi's Stadium,Santa Clara,California


In [5]:
print(df.columns)

Index(['Date', 'SB', 'Winner', 'Winner Pts', 'Loser', 'Loser Pts', 'MVP',
       'Stadium', 'City', 'State'],
      dtype='object')


In [6]:
from sqlalchemy.ext.declarative import declarative_base

# Run process of mapping existing tables in a database to class and give us tools to declare new tables schema
Base = declarative_base()

In [7]:
# Add tools of mapping variablis types from databse to Python => declare new table schema
from sqlalchemy import Column, Integer, Float, String, Date
from sqlalchemy import ForeignKey
from sqlalchemy import Sequence, CheckConstraint, UniqueConstraint

# TEAM           (id_team, name)
# MATCH          (id_match, date, sb)
# MATCH'S RESULT (id, id_match, id_stadium, winner (id_team), winner points, loser(id_team), loser points, mvp)
# STADIUM        (id_stadium, stadium name, city, state)

In [8]:
# table "TEAM" initialization

class Team(Base):
    __tablename__ = "teams"
    id            = Column(Integer, primary_key=True)
    team_name     = Column(String(50))

    def __repr__(self):
        return "<teams(id='{0}', name={1})>".format(self.id, self.team_name)

In [9]:
# table "MATCH" initialization

class Match(Base):
    __tablename__ = "matches"
    __table_args__ = (
        UniqueConstraint('sb'),
    )
    id            = Column(Integer, primary_key = True)
    date          = Column(Date, nullable = False)
    sb            = Column(String(50), nullable = False)

    def __repr__(self):
        return "<matches(id='{0}', date={1}, sb={2})>".format(self.id, self.date, self.sb)

In [10]:
# table "MATCH RESULT" initialization

class MatchResult(Base):
    __tablename__ = "match_results"
    __table_args__ = (
        CheckConstraint('winner_points > 0'),
        CheckConstraint('loser_points > 0'),
        CheckConstraint('length(mvp) > 0'),
    )
    id            = Column(Integer, primary_key=True)
    id_match      = Column(Integer, ForeignKey("matches.id"))
    id_stadium    = Column(Integer, ForeignKey("stadiums.id"))
    id_winner        = Column(Integer, ForeignKey("teams.id"))
    winner_points = Column(Integer, nullable = False)
    id_loser         = Column(Integer, ForeignKey("teams.id"))
    loser_points  = Column(Integer, nullable = False)
    mvp           = Column(String(50), nullable = False)

    def __repr__(self):
        return "<match_results(id='{0}', id_match={1}, id_stadium={2}, winner={3},winner_points={4}, loser={5}, loser_points={6}, mvp={7})>".format(
            self.id, self.id_match, self.id_stadium, self.winner, self.winner_points, self.loser, self.loser_points, self.mvp)

In [11]:
# table "STADIUM" initialization

class Stadium(Base):
    __tablename__ = "stadiums"
    __table_args__ = (
        UniqueConstraint('stadium_name'),
        CheckConstraint('length(city) > 0'),
        CheckConstraint('length(state) > 0'),
    )
    id            = Column(Integer, primary_key=True)
    stadium_name  = Column(String(50), nullable = False)
    city          = Column(String(50), nullable = False)
    state         = Column(String(50), nullable = False)

    def __repr__(self):
        return "<matches(id='{0}', name={1}, city={2}, state={3})>".format(
            self.id, self.stadium_name, self.city, self.state)

In [12]:
# Full schema of table in database
Base.metadata.create_all(engine)

In [13]:
engine.table_names()

['matches', 'stadiums', 'teams', 'match_results']

In [14]:
# TEAM TABLE

winner_list = df['Winner'].unique()
print(winner_list)

['Kansas City Chiefs' 'New England Patriots' 'Philadelphia Eagles'
 'Denver Broncos' 'Seattle Seahawks' 'Baltimore Ravens' 'New York Giants'
 'Green Bay Packers' 'New Orleans Saints' 'Pittsburgh Steelers'
 'Indianapolis Colts' 'Tampa Bay Buccaneers' 'St. Louis Rams'
 'Dallas Cowboys' 'San Francisco 49ers' 'Washington Redskins'
 'Chicago Bears' 'Los Angeles Raiders' 'Oakland Raiders' 'Miami Dolphins'
 'Baltimore Colts' 'New York Jets']


In [15]:
loser_list = df['Loser'].unique()
print(loser_list)

['San Francisco 49ers' 'Los Angeles Rams' 'New England Patriots'
 'Atlanta Falcons' 'Carolina Panthers' 'Seattle Seahawks' 'Denver Broncos'
 'Pittsburgh Steelers' 'Indianapolis Colts' 'Arizona Cardinals'
 'Chicago Bears' 'Philadelphia Eagles' 'Oakland Raiders' 'St. Louis Rams'
 'New York Giants' 'Tennessee Titans' 'Green Bay Packers'
 'San Diego Chargers' 'Buffalo Bills' 'Cincinnati Bengals'
 'Miami Dolphins' 'Washington Redskins' 'Dallas Cowboys'
 'Minnesota Vikings' 'Baltimore Colts' 'Kansas City Chiefs']


In [16]:
# Add winner_list and loser_list to obtain team_list
import numpy as np

team_list = np.concatenate((winner_list, loser_list))
print(team_list)

['Kansas City Chiefs' 'New England Patriots' 'Philadelphia Eagles'
 'Denver Broncos' 'Seattle Seahawks' 'Baltimore Ravens' 'New York Giants'
 'Green Bay Packers' 'New Orleans Saints' 'Pittsburgh Steelers'
 'Indianapolis Colts' 'Tampa Bay Buccaneers' 'St. Louis Rams'
 'Dallas Cowboys' 'San Francisco 49ers' 'Washington Redskins'
 'Chicago Bears' 'Los Angeles Raiders' 'Oakland Raiders' 'Miami Dolphins'
 'Baltimore Colts' 'New York Jets' 'San Francisco 49ers'
 'Los Angeles Rams' 'New England Patriots' 'Atlanta Falcons'
 'Carolina Panthers' 'Seattle Seahawks' 'Denver Broncos'
 'Pittsburgh Steelers' 'Indianapolis Colts' 'Arizona Cardinals'
 'Chicago Bears' 'Philadelphia Eagles' 'Oakland Raiders' 'St. Louis Rams'
 'New York Giants' 'Tennessee Titans' 'Green Bay Packers'
 'San Diego Chargers' 'Buffalo Bills' 'Cincinnati Bengals'
 'Miami Dolphins' 'Washington Redskins' 'Dallas Cowboys'
 'Minnesota Vikings' 'Baltimore Colts' 'Kansas City Chiefs']


In [17]:
# Change numpy arrays (*_list) to data frames
team_list_data_frame = pd.DataFrame({'team_name':team_list}).drop_duplicates()
team_list_data_frame = team_list_data_frame.reset_index().drop(columns = ['index'])
team_list_data_frame.index.name = 'id'
print(team_list_data_frame)

               team_name
id                      
0     Kansas City Chiefs
1   New England Patriots
2    Philadelphia Eagles
3         Denver Broncos
4       Seattle Seahawks
5       Baltimore Ravens
6        New York Giants
7      Green Bay Packers
8     New Orleans Saints
9    Pittsburgh Steelers
10    Indianapolis Colts
11  Tampa Bay Buccaneers
12        St. Louis Rams
13        Dallas Cowboys
14   San Francisco 49ers
15   Washington Redskins
16         Chicago Bears
17   Los Angeles Raiders
18       Oakland Raiders
19        Miami Dolphins
20       Baltimore Colts
21         New York Jets
22      Los Angeles Rams
23       Atlanta Falcons
24     Carolina Panthers
25     Arizona Cardinals
26      Tennessee Titans
27    San Diego Chargers
28         Buffalo Bills
29    Cincinnati Bengals
30     Minnesota Vikings


In [18]:
team_list_data_frame.to_sql('teams', engine, if_exists='append')

In [19]:
# Table "MATCH"
match_list = df[['Date', 'SB']].drop_duplicates()
match_list.index.name = 'id'
match_list = match_list.rename(columns = {'Date': 'date', 'SB':'sb'})
print(match_list)

           date            sb
id                           
0    Feb 2 2020      LIV (54)
1    Feb 3 2019     LIII (53)
2    Feb 4 2018      LII (52)
3    Feb 5 2017       LI (51)
4    Feb 7 2016            50
5    Feb 1 2015     XLIX (49)
6    Feb 2 2014   XLVIII (48)
7    Feb 3 2013    XLVII (47)
8    Feb 5 2012     XLVI (46)
9    Feb 6 2011      XLV (45)
10   Feb 7 2010     XLIV (44)
11   Feb 1 2009    XLIII (43)
12   Feb 3 2008     XLII (42)
13   Feb 4 2007      XLI (41)
14   Feb 5 2006       XL (40)
15   Feb 6 2005    XXXIX (39)
16   Feb 1 2004  XXXVIII (38)
17  Jan 26 2003   XXXVII (37)
18   Feb 3 2002    XXXVI (36)
19  Jan 28 2001     XXXV (35)
20  Jan 30 2000    XXXIV (34)
21  Jan 31 1999   XXXIII (33)
22  Jan 25 1998    XXXII (32)
23  Jan 26 1997     XXXI (31)
24  Jan 28 1996      XXX (30)
25  Jan 29 1995     XXIX (29)
26  Jan 30 1994   XXVIII (28)
27  Jan 31 1993    XXVII (27)
28  Jan 26 1992     XXVI (26)
29  Jan 27 1991      XXV (25)
30  Jan 28 1990     XXIV (24)
31  Jan 22

In [20]:
match_list.to_sql('matches', engine, if_exists='append')

In [21]:
# Table "MATCH RESULT"
match_result_list = df[['Date', 'Stadium', 'Winner', 'Winner Pts', 'Loser', 'Loser Pts', 'MVP']]
match_result_list.index.name = 'id'
match_result_list = match_result_list.rename(columns = {'Date': 'id_match', 'Stadium':'id_stadium', 'Winner':'id_winner', 'Winner Pts':'winner_points', 'Loser':'id_loser', 'Loser Pts':'loser_points', 'MVP':'mvp'})
print(match_result_list)

       id_match                     id_stadium             id_winner  \
id                                                                     
0    Feb 2 2020              Hard Rock Stadium    Kansas City Chiefs   
1    Feb 3 2019          Mercedes-Benz Stadium  New England Patriots   
2    Feb 4 2018              U.S. Bank Stadium   Philadelphia Eagles   
3    Feb 5 2017                    NRG Stadium  New England Patriots   
4    Feb 7 2016                 Levi's Stadium        Denver Broncos   
5    Feb 1 2015  University of Phoenix Stadium  New England Patriots   
6    Feb 2 2014                MetLife Stadium      Seattle Seahawks   
7    Feb 3 2013        Mercedes-Benz Superdome      Baltimore Ravens   
8    Feb 5 2012              Lucas Oil Stadium       New York Giants   
9    Feb 6 2011                Cowboys Stadium     Green Bay Packers   
10   Feb 7 2010               Sun Life Stadium    New Orleans Saints   
11   Feb 1 2009          Raymond James Stadium   Pittsburgh Stee

In [22]:
match_result_list.to_sql('match_results', engine, if_exists='append')

DataError: (psycopg2.errors.InvalidTextRepresentation) invalid input syntax for integer: "Feb 2 2020"
LINE 1: ...r_points, id_loser, loser_points, mvp) VALUES (0, 'Feb 2 202...
                                                             ^

[SQL: INSERT INTO match_results (id, id_match, id_stadium, id_winner, winner_points, id_loser, loser_points, mvp) VALUES (%(id)s, %(id_match)s, %(id_stadium)s, %(id_winner)s, %(winner_points)s, %(id_loser)s, %(loser_points)s, %(mvp)s)]
[parameters: ({'id': 0, 'id_match': 'Feb 2 2020', 'id_stadium': 'Hard Rock Stadium', 'id_winner': 'Kansas City Chiefs', 'winner_points': 31, 'id_loser': 'San Francisco 49ers', 'loser_points': 20, 'mvp': 'Patrick Mahomes'}, {'id': 1, 'id_match': 'Feb 3 2019', 'id_stadium': 'Mercedes-Benz Stadium', 'id_winner': 'New England Patriots', 'winner_points': 13, 'id_loser': 'Los Angeles Rams', 'loser_points': 3, 'mvp': 'Julian Edelman'}, {'id': 2, 'id_match': 'Feb 4 2018', 'id_stadium': 'U.S. Bank Stadium', 'id_winner': 'Philadelphia Eagles', 'winner_points': 41, 'id_loser': 'New England Patriots', 'loser_points': 33, 'mvp': 'Nick Foles'}, {'id': 3, 'id_match': 'Feb 5 2017', 'id_stadium': 'NRG Stadium', 'id_winner': 'New England Patriots', 'winner_points': 34, 'id_loser': 'Atlanta Falcons', 'loser_points': 28, 'mvp': 'Tom Brady'}, {'id': 4, 'id_match': 'Feb 7 2016', 'id_stadium': "Levi's Stadium", 'id_winner': 'Denver Broncos', 'winner_points': 24, 'id_loser': 'Carolina Panthers', 'loser_points': 10, 'mvp': 'Von Miller'}, {'id': 5, 'id_match': 'Feb 1 2015', 'id_stadium': 'University of Phoenix Stadium', 'id_winner': 'New England Patriots', 'winner_points': 28, 'id_loser': 'Seattle Seahawks', 'loser_points': 24, 'mvp': 'Tom Brady'}, {'id': 6, 'id_match': 'Feb 2 2014', 'id_stadium': 'MetLife Stadium', 'id_winner': 'Seattle Seahawks', 'winner_points': 43, 'id_loser': 'Denver Broncos', 'loser_points': 8, 'mvp': 'Malcolm Smith'}, {'id': 7, 'id_match': 'Feb 3 2013', 'id_stadium': 'Mercedes-Benz Superdome', 'id_winner': 'Baltimore Ravens', 'winner_points': 34, 'id_loser': 'San Francisco 49ers', 'loser_points': 31, 'mvp': 'Joe Flacco'}  ... displaying 10 of 54 total bound parameter sets ...  {'id': 52, 'id_match': 'Jan 14 1968', 'id_stadium': 'Orange Bowl', 'id_winner': 'Green Bay Packers', 'winner_points': 33, 'id_loser': 'Oakland Raiders', 'loser_points': 14, 'mvp': 'Bart Starr+'}, {'id': 53, 'id_match': 'Jan 15 1967', 'id_stadium': 'Memorial Coliseum', 'id_winner': 'Green Bay Packers', 'winner_points': 35, 'id_loser': 'Kansas City Chiefs', 'loser_points': 10, 'mvp': 'Bart Starr+'})]
(Background on this error at: http://sqlalche.me/e/9h9h)

In [23]:
# Table "STADIUM"
stadium_list = df[['Stadium', 'City', 'State']]
stadium_list.index.name = 'id'
stadium_list = stadium_list.rename(columns = {'Stadium': 'stadium_name', 'City':'city', 'State':'state'})
print(stadium_list)

                     stadium_name             city       state
id                                                            
0               Hard Rock Stadium    Miami Gardens     Florida
1           Mercedes-Benz Stadium          Atlanta     Georgia
2               U.S. Bank Stadium      Minneapolis   Minnesota
3                     NRG Stadium          Houston       Texas
4                  Levi's Stadium      Santa Clara  California
5   University of Phoenix Stadium         Glendale     Arizona
6                 MetLife Stadium  East Rutherford  New Jersey
7         Mercedes-Benz Superdome      New Orleans   Louisiana
8               Lucas Oil Stadium     Indianapolis     Indiana
9                 Cowboys Stadium        Arlington       Texas
10               Sun Life Stadium    Miami Gardens     Florida
11          Raymond James Stadium            Tampa     Florida
12  University of Phoenix Stadium         Glendale     Arizona
13                Dolphin Stadium    Miami Gardens     

In [24]:
stadium_list.to_sql('stadiums', engine, if_exists='append')

IntegrityError: (psycopg2.errors.UniqueViolation) duplicate key value violates unique constraint "stadiums_stadium_name_key"
DETAIL:  Key (stadium_name)=(University of Phoenix Stadium) already exists.

[SQL: INSERT INTO stadiums (id, stadium_name, city, state) VALUES (%(id)s, %(stadium_name)s, %(city)s, %(state)s)]
[parameters: ({'id': 0, 'stadium_name': 'Hard Rock Stadium', 'city': 'Miami Gardens', 'state': 'Florida'}, {'id': 1, 'stadium_name': 'Mercedes-Benz Stadium', 'city': 'Atlanta', 'state': 'Georgia'}, {'id': 2, 'stadium_name': 'U.S. Bank Stadium', 'city': 'Minneapolis', 'state': 'Minnesota'}, {'id': 3, 'stadium_name': 'NRG Stadium', 'city': 'Houston', 'state': 'Texas'}, {'id': 4, 'stadium_name': "Levi's Stadium", 'city': 'Santa Clara', 'state': 'California'}, {'id': 5, 'stadium_name': 'University of Phoenix Stadium', 'city': 'Glendale', 'state': 'Arizona'}, {'id': 6, 'stadium_name': 'MetLife Stadium', 'city': 'East Rutherford', 'state': 'New Jersey'}, {'id': 7, 'stadium_name': 'Mercedes-Benz Superdome', 'city': 'New Orleans', 'state': 'Louisiana'}  ... displaying 10 of 54 total bound parameter sets ...  {'id': 52, 'stadium_name': 'Orange Bowl', 'city': 'Miami', 'state': 'Florida'}, {'id': 53, 'stadium_name': 'Memorial Coliseum', 'city': 'Los Angeles', 'state': 'California'})]
(Background on this error at: http://sqlalche.me/e/gkpj)