In [1]:
import sqlalchemy
from sqlalchemy.ext.declarative import declarative_base
import pandas as pd
from os import environ
from os import walk
from os.path import join

In [2]:
from sqlalchemy.sql import exists
from sqlalchemy import insert

In [3]:
import re

In [4]:
engine = sqlalchemy.create_engine("mariadb+mariadbconnector://"\
                                  +environ.get("USER")+":"\
                                  +environ.get("PSWD")+"@127.0.0.1:3306/nba")

In [5]:
Base = declarative_base()

In [6]:
class Players(Base):
    __tablename__ = "Players"
    ID = sqlalchemy.Column(sqlalchemy.Integer, primary_key = True)
    Name = sqlalchemy.Column(sqlalchemy.String(length=255))

In [7]:
class Teams(Base):
    __tablename__ = "Teams"
    ID = sqlalchemy.Column(sqlalchemy.Integer, primary_key = True)
    Name = sqlalchemy.Column(sqlalchemy.String(length=255))

In [16]:
class Connector:
    def __init__(self):
        Base.metadata.create_all(engine)
        Session = sqlalchemy.orm.sessionmaker()
        Session.configure(bind=engine)
        self.session = Session()
        
    def add_players(self, df):
        
        ids = df["PLAYER_ID"]
        names = df["PLAYER"]
        for (i,p) in zip(ids,names):
            e = self.session.query(exists().where(Players.ID == i)).scalar()
            if(not e):
                player = Players(ID = i, Name = p)
                self.session.add(player)
                self.session.commit()
        return
                
    def add_teams(self, df):
        
        ids = df["TEAM_ID"].unique()
        names = df["TEAM"].unique()
        for (i,t) in zip(ids,names):
            e = self.session.query(exists().where(Teams.ID == int(i))).scalar()
            if(not e):
                team = Teams(ID = int(i), Name = t)
                self.session.add(team)
                self.session.commit()
                
        return
    
    def add_seasonal_performances(self, df,mode,year):
        seasons = {"preseason":"001",
                   "regular_season":"002",
                   "all_star":"003",
                   "playoffs":"004",
                   "play_in":"005"}
        
        
        cols = ['PLAYER_ID','TEAM_ID','AGE','GP','W','L','MIN','PTS','FGM','FGA',\
        'FG%','3PM','3PA','3P%','FTM','FTA','FT%','OREB','DREB','REB','AST',\
        'TOV','STL','BLK','PF','DD2','TD3']
        
        db_cols = ['PLAYER_ID','TEAM_ID','AGE','GP','W','L','MINS','PTS','FGM','FGA',\
           'FGP','PM3','PA3','P3P','FTM','FTA','FTP','OREB','DREB','REB','AST',\
           'TOV','STL','BLK','PF','DD2','TD3']
        
        df[["FG%","3P%","FT%"]] /= 100
        
        d = dict(zip(cols,db_cols))
        
        df = df[cols]
        df = df.rename(columns=d) 
        df["Season_ID"] = seasons[mode]+year
        df.to_sql("Seasonal_performance",engine,if_exists="append", index = False)
        
        return
    
    def add_team_standings(self, df,mode,year):
        seasons = {"preseason":"001",
                   "regular_season":"002",
                   "all_star":"003",
                   "playoffs":"004",
                   "play_in":"005"}
        
        
        cols = ['TEAM_ID','GP','W','L','WIN%','MIN','PTS','FGM','FGA',\
        'FG%','3PM','3PA','3P%','FTM','FTA','FT%','OREB','DREB','REB','AST',\
        'TOV','STL','BLK','BLKA','PF','PFD']
        
        db_cols = ['TEAM_ID','GP','W','L','WINP','MINS','PTS','FGM','FGA',\
           'FGP','PM3','PA3','P3P','FTM','FTA','FTP','OREB','DREB','REB','AST',\
           'TOV','STL','BLK','BLKA','PF','PFD']
        
        df[["FG%","3P%","FT%"]] /= 100
        
        d = dict(zip(cols,db_cols))
        
        df = df[cols]
        df = df.rename(columns=d)
        df.insert(0,"SEASON_ID",seasons[mode]+year)
        df.to_sql("Team_standings",engine,if_exists="append", index = False)
        
        return

In [17]:
def players_csv_to_db(c):
    for root,_, files in walk("../player_stats/"):
        for f in files:
            path = join(root,f)
            if(".csv" in path):
                df = pd.read_csv(path)
                c.add_players(df)

In [18]:
def teams_csv_to_db(c):
    for root,_, files in walk("../player_stats/"):
        for f in files:
            path = join(root,f)
            if("regular_season.csv" in path):
                df = pd.read_csv(path)
                c.add_teams(df)

In [19]:
def get_season_year_code(year):
    if(year == "00"):
        return 99
    
    return int(year)-1

In [20]:
def seasonals_csv_to_db(c):
    for root,_, files in walk("../player_stats/"):
        for f in files:
            path = join(root,f)
            year = re.search("(\d+)-(\d+)",path)
            if(year):
                year = get_season_year_code(year.group().split("-")[1])
                year = "{:0>2d}".format(year)
                if("regular_season.csv" in path):
                    df = pd.read_csv(path)
                    c.add_seasonal_performances(df,"regular_season",year)
                elif("playoffs.csv" in path):
                    df = pd.read_csv(path)
                    c.add_seasonal_performances(df,"playoffs",year)

In [21]:
def standings_csv_to_db(c):
    d = {}
    for root,_, files in walk("../team_standings/"):
        for f in files:
            path = join(root,f)
            year = re.search("(\d+)-(\d+)",path)
            if(year):
                year = get_season_year_code(year.group().split("-")[1])
                year = "{:0>2d}".format(year)
                if("regular_season.csv" in path):
                    df = pd.read_csv(path)
                    d = c.add_team_standings(df,"regular_season",year)
                elif("playoffs.csv" in path):
                    df = pd.read_csv(path)
                    d = c.add_team_standings(df,"playoffs",year)
    return d

In [22]:
c = Connector()

In [23]:
standings_csv_to_db(c)