In [1]:
import sqlalchemy
from sqlalchemy.ext.declarative import declarative_base
import pandas as pd
from os import environ
from os import walk
from os.path import join

In [2]:
from sqlalchemy.sql import exists
from sqlalchemy import insert

In [3]:
engine = sqlalchemy.create_engine("mariadb+mariadbconnector://"\
                                  +environ.get("USER")+":"\
                                  +environ.get("PSWD")+"@127.0.0.1:3306/nba")

In [4]:
Base = declarative_base()

In [5]:
class Teams(Base):
    __tablename__ = "Teams"
    ID = sqlalchemy.Column(sqlalchemy.Integer, primary_key = True)
    Name = sqlalchemy.Column(sqlalchemy.String(length=255))

In [6]:
class Players(Base):
    __tablename__ = "Players"
    ID = sqlalchemy.Column(sqlalchemy.Integer, primary_key = True)
    Name = sqlalchemy.Column(sqlalchemy.String(length=255))

In [11]:
class Connector:
    def __init__(self):
        Base.metadata.create_all(engine)
        Session = sqlalchemy.orm.sessionmaker()
        Session.configure(bind=engine)
        self.session = Session()
        
    def addPlayers(self, df):
        
        ids = df["PLAYER_ID"]
        names = df["PLAYER"]
        for (i,p) in zip(ids,names):
            e = self.session.query(exists().where(Players.ID == i)).scalar()
            if(not e):
                player = Players(ID = i, Name = p)
                self.session.add(player)
                self.session.commit()
                
    def addTeams(self, df):
        
        ids = df["TEAM_ID"].unique()
        names = df["TEAM"].unique()
        for (i,t) in zip(ids,names):
            e = self.session.query(exists().where(Teams.ID == int(i))).scalar()
            if(not e):
                team = Teams(ID = int(i), Name = t)
                self.session.add(team)
                self.session.commit()

In [None]:
def players_csv_to_db(c):
    for root,_, files in walk("../player_stats/"):
        for f in files:
            path = join(root,f)
            if("regular_season.csv" in path):
                df = pd.read_csv(path)
                c.addPlayers(df)

In [12]:
def team_csv_to_db(c):
    for root,_, files in walk("../player_stats/"):
        for f in files:
            path = join(root,f)
            if("regular_season.csv" in path):
                df = pd.read_csv(path)
                c.addTeams(df)

In [13]:
c = Connector()

In [14]:
csv_to_db(c)

../player_stats/2014-15/regular_season.csv
../player_stats/2019-20/regular_season.csv
../player_stats/2007-08/regular_season.csv
../player_stats/2000-01/regular_season.csv
../player_stats/1999-00/regular_season.csv
../player_stats/1996-97/regular_season.csv
../player_stats/2004-05/regular_season.csv
../player_stats/1998-99/regular_season.csv
../player_stats/2013-14/regular_season.csv
../player_stats/2006-07/regular_season.csv
../player_stats/2017-18/regular_season.csv
../player_stats/2009-10/regular_season.csv
../player_stats/2010-11/regular_season.csv
../player_stats/2015-16/regular_season.csv
../player_stats/2001-02/regular_season.csv
../player_stats/2005-06/regular_season.csv
../player_stats/2002-03/regular_season.csv
../player_stats/1997-98/regular_season.csv
../player_stats/2018-19/regular_season.csv
../player_stats/2016-17/regular_season.csv
../player_stats/2008-09/regular_season.csv
../player_stats/2012-13/regular_season.csv
../player_stats/2003-04/regular_season.csv
../player_s

In [20]:
df = pd.read_csv("../player_stats/2007-08/regular_season.csv")

In [22]:
df.loc[df["PLAYER"] == "Kevin Durant"]

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,PLAYER,TEAM,AGE,GP,W,L,MIN,PTS,...,TOV,STL,BLK,PF,FP,DD2,TD3,+/-,PLAYER_ID,TEAM_ID
27,27,28,Kevin Durant,SEA,19,80,19,61,34.6,20.3,...,2.9,1.0,0.9,1.5,32.0,1,0,-8.2,201142,1610612760


In [34]:
df["TEAM_ID"].unique()

array([1610612739, 1610612747, 1610612743, 1610612756, 1610612748,
       1610612758, 1610612742, 1610612749, 1610612751, 1610612761,
       1610612746, 1610612745, 1610612744, 1610612766, 1610612737,
       1610612764, 1610612762, 1610612740, 1610612750, 1610612753,
       1610612752, 1610612760, 1610612763, 1610612755, 1610612738,
       1610612754, 1610612759, 1610612757, 1610612741, 1610612765],
      dtype=int64)