In [1]:
from sqlalchemy import Column, Integer, String, Date, Text, VARCHAR
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import ForeignKey
from sqlalchemy import CheckConstraint


Base = declarative_base()

from sqlalchemy import create_engine

db_string = "postgres://postgres:admin@localhost:5432/AdvancedDatabases_lab4_5"

engine = create_engine(db_string)

In [2]:
import pandas as pd


df = pd.read_csv("superbowl.csv")
df

Unnamed: 0,Date,SB,Winner,Winner Pts,Loser,Loser Pts,MVP,Stadium,City,State
0,Feb 2 2020,LIV (54),Kansas City Chiefs,31,San Francisco 49ers,20,Patrick Mahomes,Hard Rock Stadium,Miami Gardens,Florida
1,Feb 3 2019,LIII (53),New England Patriots,13,Los Angeles Rams,3,Julian Edelman,Mercedes-Benz Stadium,Atlanta,Georgia
2,Feb 4 2018,LII (52),Philadelphia Eagles,41,New England Patriots,33,Nick Foles,U.S. Bank Stadium,Minneapolis,Minnesota
3,Feb 5 2017,LI (51),New England Patriots,34,Atlanta Falcons,28,Tom Brady,NRG Stadium,Houston,Texas
4,Feb 7 2016,50,Denver Broncos,24,Carolina Panthers,10,Von Miller,Levi's Stadium,Santa Clara,California
5,Feb 1 2015,XLIX (49),New England Patriots,28,Seattle Seahawks,24,Tom Brady,University of Phoenix Stadium,Glendale,Arizona
6,Feb 2 2014,XLVIII (48),Seattle Seahawks,43,Denver Broncos,8,Malcolm Smith,MetLife Stadium,East Rutherford,New Jersey
7,Feb 3 2013,XLVII (47),Baltimore Ravens,34,San Francisco 49ers,31,Joe Flacco,Mercedes-Benz Superdome,New Orleans,Louisiana
8,Feb 5 2012,XLVI (46),New York Giants,21,New England Patriots,17,Eli Manning,Lucas Oil Stadium,Indianapolis,Indiana
9,Feb 6 2011,XLV (45),Green Bay Packers,31,Pittsburgh Steelers,25,Aaron Rodgers,Cowboys Stadium,Arlington,Texas


In [3]:
#Create new tables in database

class Player(Base):
    __tablename__ = 'Player'
    __table_args__ = (
        CheckConstraint('LENGTH(player_name) > 0'),
        {'extend_existing' : True},
    )
    player_id = Column(Integer, primary_key=True)
    player_name = Column(String(50), nullable = False)
    
    def __repr__(self):
        return "<Player(player_id='{0}', player_name={1})>".format(self.player_id, self.player_name)
    
class State(Base):
    __tablename__ = 'State'
    __table_args__ = (
        CheckConstraint('LENGTH(state_name) > 0'),
        {'extend_existing' : True},
    )
    state_id = Column(Integer, primary_key=True)
    state_name = Column(String(50), nullable = False)
    
    def __repr__(self):
        return "<State(state_id='{0}', state_name={1})>".format(self.state_id, self.state_name)

class City(Base):
    __tablename__ = 'City'
    __table_args__ = (
        CheckConstraint('LENGTH(city_name) > 0'),
        {'extend_existing' : True},
    )
    city_id = Column(Integer, primary_key=True)
    city_name = Column(String(50), nullable = False)
    state_id = Column(Integer, ForeignKey('State.state_id'))
    
    def __repr__(self):
        return "<City(city_id='{0}', city_name={1}, state_id={2})>".format(self.city_id, self.city_name, self.state_id)
    
class Stadium(Base):
    __tablename__ = 'Stadium'
    __table_args__ = (
        CheckConstraint('LENGTH(stadium_name) > 0'),
        {'extend_existing' : True},
    )
    stadium_id = Column(Integer, primary_key=True)
    stadium_name = Column(String(50), nullable = False)
    city_id = Column(Integer, ForeignKey('City.city_id'))
    
    def __repr__(self):
        return "<Stadium(stadium_id='{0}', stadium_name={1}, city_id={2})>".format(self.stadium_id, self.stadium_name, self.city_id)

class Team(Base):
    __tablename__ = 'Team'
    __table_args__ = (
        CheckConstraint('LENGTH(team_name) > 0'),
        {'extend_existing' : True},
    )
    team_id = Column(Integer, primary_key=True)
    team_name = Column(String(50), nullable = False)
    
    def __repr__(self):
        return "<Team(team_id='{0}', team_name={1})>".format(self.team_id, self.team_name)
    
class SuperBowl(Base):
    __tablename__ = 'SuperBowl'
    __table_args__ = (
        CheckConstraint('LENGTH(superbowl_name) > 0'),
        {'extend_existing' : True},
    )
    superbowl_id = Column(Integer, primary_key=True)
    superbowl_name = Column(String(50), nullable = False)
    
    def __repr__(self):
        return "<SuperBowl(superbowl_id='{0}', superbowl_name={1})>".format(self.superbowl_id, self.superbowl_name)

class Match(Base):
    __tablename__ = 'Match'
    __table_args__ = (
        CheckConstraint('winner_pts >= 0'),
        CheckConstraint('loser_pts >= 0'),
        {'extend_existing' : True},
    )
    match_id = Column(Integer, primary_key=True)
    date = Column(Date, nullable = False)
    superbowl_id = Column(Integer, ForeignKey('SuperBowl.superbowl_id'))
    winner_id = Column(Integer, ForeignKey('Team.team_id'))
    winner_pts = Column(Integer, nullable = False)
    loser_id = Column(Integer, ForeignKey('Team.team_id'))
    loser_pts = Column(Integer, nullable = False)
    mvp_id = Column(Integer, ForeignKey('Player.player_id'))
    stadium_id = Column(Integer, ForeignKey('Stadium.stadium_id'))
    
    def __repr__(self):
        return "<Match(match_id='{0}', date={1}, superbowl_id={2}, winner_id={3}, winner_pts={4}, loser_id={5}, loser_pts={6}, mvp_id={7}, stadium_id={8})>".format(self.match_id, self.date, self.superbowl_id, self.winner_id, self.winner_pts, self.loser_id, self.loser_pts, self.mvp_id, self.stadium_id)

Base.metadata.create_all(engine)

In [4]:
players_list = pd.DataFrame(columns=['player_name'])

for elem in df["MVP"].unique():
    players_list = players_list.append({'player_name': elem}, ignore_index=True)

players_list.index.name = 'player_id'
players_list

Unnamed: 0_level_0,player_name
player_id,Unnamed: 1_level_1
0,Patrick Mahomes
1,Julian Edelman
2,Nick Foles
3,Tom Brady
4,Von Miller
5,Malcolm Smith
6,Joe Flacco
7,Eli Manning
8,Aaron Rodgers
9,Drew Brees


In [5]:
states_list = pd.DataFrame(columns=['state_name'])

for elem in df["State"].unique():
    states_list = states_list.append({'state_name': elem}, ignore_index=True)

states_list.index.name = 'state_id'
states_list

Unnamed: 0_level_0,state_name
state_id,Unnamed: 1_level_1
0,Florida
1,Georgia
2,Minnesota
3,Texas
4,California
5,Arizona
6,New Jersey
7,Louisiana
8,Indiana
9,Michigan


In [6]:
teams_set = set()

for elem in df["Winner"].unique():
    teams_set.add(elem)
    
for elem in df["Loser"].unique():
    teams_set.add(elem)

teams_list = pd.DataFrame(columns=['team_name'])
for elem in teams_set:
    teams_list = teams_list.append({'team_name': elem}, ignore_index=True)

teams_list.index.name = 'team_id'
teams_list

Unnamed: 0_level_0,team_name
team_id,Unnamed: 1_level_1
0,New York Giants
1,Tampa Bay Buccaneers
2,New Orleans Saints
3,Los Angeles Raiders
4,Indianapolis Colts
5,Arizona Cardinals
6,Seattle Seahawks
7,Pittsburgh Steelers
8,Cincinnati Bengals
9,Oakland Raiders


In [7]:
superbowl_list = pd.DataFrame(columns=['superbowl_name'])

for elem in df["SB"].unique():
    superbowl_list = superbowl_list.append({'superbowl_name': elem}, ignore_index=True)

superbowl_list.index.name = 'superbowl_id'

In [8]:
cities_set = set()
for index, city in enumerate(df['City']):
    cities_set.add((city, df['State'][index]))
    
def state_to_state_id(state):
    for state_name, state_id in zip(states_list['state_name'], states_list.index):
        if state_name == state:
            return state_id
    return 0

cities_list = pd.DataFrame(columns=['city_name', 'state_id'])
for elem in cities_set:
    cities_list = cities_list.append({'city_name': elem[0], 'state_id': state_to_state_id(elem[1])}, ignore_index=True)
    
cities_list.index.name = 'city_id'
cities_list

Unnamed: 0_level_0,city_name,state_id
city_id,Unnamed: 1_level_1,Unnamed: 2_level_1
0,Minneapolis,2
1,East Rutherford,6
2,Santa Clara,4
3,Atlanta,1
4,Arlington,3
5,Pontiac,9
6,Pasadena,4
7,Tempe,5
8,Houston,3
9,New Orleans,7


In [9]:
stadiums_set = set()
for index, stadium in enumerate(df['Stadium']):
    stadiums_set.add((stadium, df['City'][index]))
    
def city_to_city_id(city):
    for city_name, city_id in zip(cities_list['city_name'], cities_list.index):
        if city_name == city:
            return city_id
    return 0

stadiums_list = pd.DataFrame(columns=['stadium_name', 'city_id'])
for elem in stadiums_set:
    stadiums_list = stadiums_list.append({'stadium_name': elem[0], 'city_id': city_to_city_id(elem[1])}, ignore_index=True)
    
stadiums_list.index.name = 'stadium_id'
stadiums_list

Unnamed: 0_level_0,stadium_name,city_id
stadium_id,Unnamed: 1_level_1,Unnamed: 2_level_1
0,Ford Field,10
1,University of Phoenix Stadium,19
2,Levi's Stadium,2
3,NRG Stadium,8
4,Joe Robbie Stadium,17
5,Rice Stadium,8
6,Tulane Stadium,9
7,Mercedes-Benz Superdome,9
8,Metrodome,0
9,Alltel Stadium,13


In [10]:
def superbowl_to_superbowl_id(sb):
    for superbowl_name, superbowl_id in zip(superbowl_list['superbowl_name'], superbowl_list.index):
        if superbowl_name == sb:
            return superbowl_id
    return 0

def team_to_team_id(team):
    for team_name, team_id in zip(teams_list['team_name'], teams_list.index):
        if team_name == team:
            return team_id
    return 0

def player_to_player_id(player):
    for player_name, player_id in zip(players_list['player_name'], players_list.index):
        if player_name == player:
            return player_id
    return 0

def stadium_to_stadium_id(stadium):
    for stadium_name, stadium_id in zip(stadiums_list['stadium_name'], stadiums_list.index):
        if stadium_name == stadium:
            return stadium_id
    return 0

match_list = pd.DataFrame(columns=['date', 'superbowl_id', 'winner_id', 'loser_id', 'loser_pts', 'mvp_id', 'stadium_id'])
for date, sb, winner, winner_pts, loser, loser_pts, mvp, stadium in zip(df['Date'],
                                                                        df['SB'],
                                                                        df['Winner'],
                                                                        df['Winner Pts'],
                                                                        df['Loser'],
                                                                        df['Loser Pts'],
                                                                        df['MVP'],
                                                                        df['Stadium']):
    match_list = match_list.append({'date': date,
                                    'superbowl_id': superbowl_to_superbowl_id(sb),
                                    'winner_id': team_to_team_id(winner),
                                    'winner_pts': winner_pts,
                                    'loser_id': team_to_team_id(loser),
                                    'loser_pts': loser_pts,
                                    'mvp_id': player_to_player_id(mvp),
                                    'stadium_id': stadium_to_stadium_id(stadium)}, ignore_index=True)
match_list.index.name = 'match_id'
match_list

Unnamed: 0_level_0,date,superbowl_id,winner_id,loser_id,loser_pts,mvp_id,stadium_id,winner_pts
match_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,Feb 2 2020,0,20,28,20,0,19,31.0
1,Feb 3 2019,1,18,19,3,1,26,13.0
2,Feb 4 2018,2,27,18,33,2,15,41.0
3,Feb 5 2017,3,18,13,28,3,3,34.0
4,Feb 7 2016,4,30,17,10,4,2,24.0
5,Feb 1 2015,5,18,6,24,3,1,28.0
6,Feb 2 2014,6,6,30,8,5,20,43.0
7,Feb 3 2013,7,23,28,31,6,7,34.0
8,Feb 5 2012,8,0,18,17,7,14,21.0
9,Feb 6 2011,9,24,7,25,8,32,31.0


In [11]:
players_list.to_sql('Player', engine, if_exists='append')
states_list.to_sql('State', engine, if_exists='append')
teams_list.to_sql('Team', engine, if_exists='append')
superbowl_list.to_sql('Country', engine, if_exists='append')
cities_list.to_sql('City', engine, if_exists='append')
stadiums_list.to_sql('Stadium', engine, if_exists='append')
match_list.to_sql('Match', engine, if_exists='append')