### Import

In [19]:
import pandas as pd
import psycopg2
from sqlalchemy import create_engine
from sqlalchemy import text

## Add Data to Database
* Create dataframes that match db tables
* Connect to db and add the data

In [20]:
years = [2021, 2022, 2023]

df = pd.DataFrame()

for i in years:
    temp = pd.read_csv(f'csv files/cleaned_{i}.csv')
    df = pd.concat([temp, df], ignore_index=True)

In [21]:
player = df[['player_id', 'player_id_ustat', 'player_name_futbin',
     'player_name_ustat', 'dob', 'nation', 'position']].drop_duplicates(subset=['player_id'])

# Have only 1 entry per team and remove messed up team names (FIFA 23, will fix later)
club = df[['club_id', 'club_name_futbin', 'club_name_ustat']].drop_duplicates(subset=['club_id'])
club = club[club['club_name_futbin'] != '\n                                           Controlled                                       ']

# competition = df

player_stats = df[['player_id', 'season', 'comp_id', 'club_id', 'apps',
                   'minutes', 'goals', 'xG','assists', 'xA', 'shots', 'key_passes', 'yellow_cards',
                   'red_cards', 'npg', 'npxG', 'xGChain', 'xGBuildup']].drop_duplicates(subset=['player_id', 'season', 'comp_id', 'club_id'])

# team_stats = df[['team_id',

# comp_stats = df

player_ratings = df[['player_id', 'season', 'card_id', 'rating', 'pac', 'acceleration', 'sprint_speed', 'sho', 'positioning', 
                     'finishing', 'shot_power', 'long_shots', 'volleys', 'penalties', 'pas', 'vision', 'crossing', 
                     'fk_accuracy', 'short_passing', 'long_passing', 'curve', 'dri', 'agility', 'balance',
                     'reactions', 'ball_control', 'dribbling', 'composure', 'def', 'interceptions', 'heading_acc', 
                     'def_awareness', 'standing_tackle', 'sliding_tackle', 'phy', 'jumping', 'stamina', 
                     'strength', 'aggression', 'gk_diving', 'gk_handling', 'gk_kicking', 'gk_reflexes', 
                     'gk_speed', 'gk_positioning']].drop_duplicates(subset=['player_id', 'season'])

In [22]:
user = 'postgres'
password = 'password'
host = 'localhost'
db = 'postgres'

engine = create_engine(f'postgresql+psycopg2://{user}:{password}@{host}/{db}')

In [23]:
with engine.connect() as con:
    con.execution_options(isolation_level="AUTOCOMMIT")
    with open("sql files/SoccerDB.sql") as file:
        query = text(file.read())
        con.execute(query)

In [24]:
dfs = [player, club, player_stats, player_ratings]
tables = ['player', 'club', 'player_stats', 'player_ratings']

for i in range(len(dfs)):
    dfs[i].to_sql(tables[i], con=engine, if_exists='append', index=False)