In [9]:
import pandas as pd
import numpy as np
import hashlib
import sqlite3
import datetime as dt

In [11]:
def data_vault_hash(string_to_hash: str) -> str:
    """function takes a string, converts every character to uppercase, replaces blankspaces and calculates MD5 of the result"""
    cleaned_string = string_to_hash.upper().replace(" ", "").encode("utf8")
    md5hash = hashlib.md5(cleaned_string).hexdigest()
    return md5hash

In [12]:
def get_column_names(table: str, db) -> list:
    """returns a list of the column names of a table from a SQL database
    if index was created properly"""
    df_column = pd.read_sql(f'PRAGMA table_info({table});', con=db)
    return df_column['name'].to_list()

In [13]:
def get_table_names(db) -> list:
    """returns a list of all the names of the tables of a SQL database"""
    query = (
            'SELECT name FROM sqlite_master '
            'WHERE type IN ("table", "view") '
            'AND name NOT LIKE "sqlite_%"'
            ';'
            )
    table = pd.read_sql(query, con=db)
    return table['name'].to_list()

In [10]:
conn = sqlite3.connect("D:/Ania Kurse/Data Engineer/Projekt/database.sqlite")
print("Opened database successfully")
# attachDatabaseSQL = "ATTACH DATABASE ':memory:' AS memdb"
# cursor.execute(attachDatabaseSQL)
cursor = conn.cursor()

Opened database successfully


In [17]:
def Hub_TAB(HUB_TAB_Name, source_TAB, BK, RS, db):
    """Create a Hub table"""
    
    # drops a table
    sql_drop = f"""DROP TABLE IF EXISTS {HUB_TAB_Name};"""
    cursor.execute(sql_drop)

    # create a table
    cursor.execute(f'''CREATE TABLE IF NOT EXISTS {HUB_TAB_Name}
         (HK TEXT PRIMARY KEY    NOT NULL,
         {source_TAB}_ID    INT    NOT NULL,
         LDTS    Datetime,
         RS    Integer);''')
    print("Table created successfully")

    # insert a table
    #primary_key = get_primary_key(source_TAB, db)
    #print(primary_key)
    sql_insert = f"""INSERT INTO {HUB_TAB_Name} (HK, {source_TAB}_ID, LDTS, RS)
                SELECT {BK}||CAST(1 AS STR), {BK}, datetime('now'), {RS}
                FROM {source_TAB};"""

    cursor.execute(sql_insert)
    print ("Records created successfully")

    # HKs generate
    df_hub = pd.read_sql(f'Select * From {HUB_TAB_Name};', con=db)
    df_hub['HK'] = df_hub.iloc[:, 1].astype(str) +" "+ df_hub.iloc[:, 3].astype(str)
    df_hub['HK']= df_hub['HK'].apply(lambda w: data_vault_hash(w))  # data_vault_hash function
    
    # load a table to a data base
    df_hub.to_sql(f'{HUB_TAB_Name}', con=db, if_exists='replace',  index=False,
                       dtype={"HK": "TEXT NOT NULL PRIMARY KEY", f'{source_TAB}_ID':"TEXT", "LDTS": "DATETIME", "RS": "INTEGER"})
    conn.commit()

In [20]:
data = [['HUB_Player', 'Player', 'player_api_id',1],
        ['HUB_Team', 'Team', 'team_api_id',1],
        ['HUB_League', 'League', 'name', 1]]

hub = pd.DataFrame(data, columns = ['HUB_TAB_Name', 'source_TAB', 'BK', 'RS'])
        
for x in range(0, len(hub)):
    Hub_TAB(hub.iloc[x].loc['HUB_TAB_Name'], hub.iloc[x].loc['source_TAB'], hub.iloc[x].loc['BK'], hub.iloc[x].loc['RS'], conn)

Table created successfully
Records created successfully
Table created successfully
Records created successfully
Table created successfully
Records created successfully


In [21]:
pd.read_sql('Select * From HUB_Player;', con=conn).head()

Unnamed: 0,HK,Player_ID,LDTS,RS
0,2a6b82244c72e226480452c163136992,2625,2021-02-10 08:24:50,1
1,4021e8ad51eb17ec0f04bc29eb5ea19f,2752,2021-02-10 08:24:50,1
2,8daf9808509694b39ac7ad3b3bda71f3,2768,2021-02-10 08:24:50,1
3,8ab0af4568a5b31a931eaf0ae9a738c3,2770,2021-02-10 08:24:50,1
4,62ab3ce23f9884db423ce886eb945289,2790,2021-02-10 08:24:50,1


In [9]:
pd.read_sql('Select * From HUB_Team;', con=conn).head()

Unnamed: 0,HK,Team_ID,LDTS,RS
0,f58edd3a416399ee20c1346b8f2c7f59,1601,2021-02-09 19:50:41,1
1,e12660adc7b1a88a9df3f14dcde95197,1773,2021-02-09 19:50:41,1
2,7a8f692cebd289e81aebd39ecdecb945,1957,2021-02-09 19:50:41,1
3,c85196ee65db64d5955c31dd6c6d8690,2033,2021-02-09 19:50:41,1
4,d950e8c6b584dfac7d569aacf43519a7,2182,2021-02-09 19:50:41,1


In [10]:
pd.read_sql('Select * From HUB_League;', con=conn).head()

Unnamed: 0,HK,League_ID,LDTS,RS
0,31ef5be155e233d921bbc78121715718,Belgium Jupiler League,2021-02-09 19:50:42,1
1,9f0cb2b86952f91eeb1935a25fcab785,England Premier League,2021-02-09 19:50:42,1
2,3b1a5b761e4646cfc70f4456e5a81d12,France Ligue 1,2021-02-09 19:50:42,1
3,6e1be1596420396bd8e9940e100b0187,Germany 1. Bundesliga,2021-02-09 19:50:42,1
4,f23275353fb4efbeb3a3453565c8ed99,Italy Serie A,2021-02-09 19:50:42,1


In [11]:
get_table_names(conn)

['Player_Attributes',
 'Player',
 'Match',
 'League',
 'Country',
 'Team',
 'Team_Attributes',
 'HUB_Player',
 'HUB_Team',
 'HUB_League']

In [387]:
#conn.execute('''DROP TABLE  HSAT_Match''')

<sqlite3.Cursor at 0x20beac44c00>

In [12]:
get_column_names('Match',conn)

['id',
 'country_id',
 'league_id',
 'season',
 'stage',
 'date',
 'match_api_id',
 'home_team_api_id',
 'away_team_api_id',
 'home_team_goal',
 'away_team_goal',
 'home_player_X1',
 'home_player_X2',
 'home_player_X3',
 'home_player_X4',
 'home_player_X5',
 'home_player_X6',
 'home_player_X7',
 'home_player_X8',
 'home_player_X9',
 'home_player_X10',
 'home_player_X11',
 'away_player_X1',
 'away_player_X2',
 'away_player_X3',
 'away_player_X4',
 'away_player_X5',
 'away_player_X6',
 'away_player_X7',
 'away_player_X8',
 'away_player_X9',
 'away_player_X10',
 'away_player_X11',
 'home_player_Y1',
 'home_player_Y2',
 'home_player_Y3',
 'home_player_Y4',
 'home_player_Y5',
 'home_player_Y6',
 'home_player_Y7',
 'home_player_Y8',
 'home_player_Y9',
 'home_player_Y10',
 'home_player_Y11',
 'away_player_Y1',
 'away_player_Y2',
 'away_player_Y3',
 'away_player_Y4',
 'away_player_Y5',
 'away_player_Y6',
 'away_player_Y7',
 'away_player_Y8',
 'away_player_Y9',
 'away_player_Y10',
 'away_player

In [25]:
match = pd.read_sql('Select id, home_player_1, home_player_2, home_player_3, home_player_4, home_player_5, home_player_6, home_player_7, home_player_8, home_player_9, home_player_10, home_player_11 From Match;', con=conn).melt('id')
match

Unnamed: 0,id,variable,value
0,1,home_player_1,
1,2,home_player_1,
2,3,home_player_1,
3,4,home_player_1,
4,5,home_player_1,
...,...,...,...
285764,25975,home_player_11,195215.0
285765,25976,home_player_11,37257.0
285766,25977,home_player_11,289472.0
285767,25978,home_player_11,178142.0


In [14]:
Match_ID = match['id']
Player_ID =  match['value']

In [15]:
def Link_Tab(LINK_TAB_Name, source_TAB, ID, BK1, BK2, db):
    """Create a Link table"""
    
    # drops a table
    sql_drop = f"""DROP TABLE IF EXISTS {LINK_TAB_Name};"""
    cursor.execute(sql_drop)

    # create a table
    cursor.execute(f'''CREATE TABLE IF NOT EXISTS {LINK_TAB_Name}
         (HK TEXT PRIMARY KEY    NOT NULL,
         HK_{source_TAB}    TEXT    NOT NULL,
         HK_Player    TEXT     NOT NULL,
         LDTS    Datetime,
         RS    Integer);''')
    
    print("Table created successfully")
    
    # insert a table
    sql_insert = f"""INSERT INTO {LINK_TAB_Name} (HK, HK_{source_TAB}, HK_Player,LDTS, RS)
                SELECT {ID}||CAST(1 AS STR), {ID}, {ID}, datetime('now'), 1
                FROM {source_TAB};"""

    cursor.execute(sql_insert)
    print ("Records created successfully")

    # HKs generate
    df_link = pd.read_sql(f'Select * From {LINK_TAB_Name};', con=db)
    df_link.iloc[:, 1]= BK1.astype(str)
    df_link.iloc[:, 2]= BK2.astype(str)
    
    df_link['HK'] = df_link.iloc[:, 1] +" "+ df_link.iloc[:, 2] +" "+ df_link.iloc[:, 4].astype(str)
    df_link['HK']= df_link['HK'].apply(lambda w: data_vault_hash(w))  # data_vault_hash function
    
    df_link.iloc[:, 1]=df_link.iloc[:, 1].apply(lambda w: data_vault_hash(w))
    df_link.iloc[:, 2]=df_link.iloc[:, 2].apply(lambda w: data_vault_hash(w))
    
    # load a table to a data base
    df_link.to_sql(f'{LINK_TAB_Name}', con=db, if_exists='replace',  index=False,
                       dtype={"HK": "TEXT NOT NULL PRIMARY KEY", f'"HK_{source_TAB}"':"TEXT", f'"HK_Player"':"TEXT", "LDTS": "DATETIME", "RS": "INTEGER"})
    conn.commit()

In [16]:
Link_Tab('LINK_TAB_Match_Player', 'Match', 'id', Match_ID, Player_ID, conn)

Table created successfully
Records created successfully


In [17]:
pd.read_sql('Select * From LINK_TAB_Match_Player;', con=conn)

Unnamed: 0,HK,HK_Match,HK_Player,LDTS,RS
0,187c2ad8640a085f068ae7365b4f33eb,c4ca4238a0b923820dcc509a6f75849b,f3e78f3265a769c4ce90390e0f40be55,2021-02-09 19:51:14,1
1,006d911b6d537919546998f031f86e67,c81e728d9d4c2f636f067f89cc14862c,f3e78f3265a769c4ce90390e0f40be55,2021-02-09 19:51:14,1
2,45e54cfe3942603dfc9a4cbdbea4b21f,eccbc87e4b5ce2fe28308fd9f2a7baf3,f3e78f3265a769c4ce90390e0f40be55,2021-02-09 19:51:14,1
3,7ceaeb060d26b4280348a6fe90ccd04c,a87ff679a2f3e71d9181a67b7542122c,f3e78f3265a769c4ce90390e0f40be55,2021-02-09 19:51:14,1
4,587740140ff2072fc5cd3af1b02c503c,e4da3b7fbbce2345d7772b0674a318d5,f3e78f3265a769c4ce90390e0f40be55,2021-02-09 19:51:14,1
...,...,...,...,...,...
25974,07d5ad65b13b1cd7d96326363634664b,acaf74112ddf8ccdaa0811c7732cfabf,b7b42b039a0f25a8930ea0dd2c201b68,2021-02-09 19:51:14,1
25975,588e0b8b6e2cf2a7851f6b335afd7094,bf2a33ef0e8482f63f7ac6c759b74daa,981d2cc6cba5174b5279d85dc87a319e,2021-02-09 19:51:14,1
25976,42547f96e70b4053828d9ad8dc5789c5,eb95e630acb84f9cff9abfc310349381,e9ab7ba5dd52e15ce1e372c1361b5a26,2021-02-09 19:51:14,1
25977,97b8f3567318783d47c5e1c00bb46e88,937315cf79eb4801d8474c4bc6558eee,f3e78f3265a769c4ce90390e0f40be55,2021-02-09 19:51:14,1


In [18]:
def Link_Tab_5(LINK_TAB_Name, source_TAB, BK1, BK2, BK3, BK4, BK5, db):
    """Create a Link table"""
    
    # drops a table
    sql_drop = f"""DROP TABLE IF EXISTS {LINK_TAB_Name};"""
    cursor.execute(sql_drop)

    # create a table
    cursor.execute(f'''CREATE TABLE IF NOT EXISTS {LINK_TAB_Name}
         (HK TEXT PRIMARY KEY    NOT NULL,
         HK_{BK1}    TEXT    NOT NULL,
         HK_{BK2}    TEXT     NOT NULL,
         HK_{BK3}    TEXT     NOT NULL,
         HK_{BK4}    TEXT     NOT NULL,
         HK_{BK5}    TEXT     NOT NULL,
         LDTS    Datetime,
         RS    Integer);''')
         #FOREIGN KEY HK_{BK1} REFERENCES LINK_TAB_Match_Player(HK_Match)
    
    
    print("Table created successfully")
    
    # insert a table
    sql_insert = f"""INSERT INTO {LINK_TAB_Name} (HK, HK_{BK1}, HK_{BK2},HK_{BK3},HK_{BK4},HK_{BK5},LDTS, RS)
                SELECT {BK1}||CAST(1 AS STR), {BK1}, {BK2}, {BK3}, {BK4}, {BK5}, datetime('now'), 1
                FROM {source_TAB};"""

    cursor.execute(sql_insert)
    print ("Records created successfully")

    # HKs generate
    df_link = pd.read_sql(f'Select * From {LINK_TAB_Name};', con=db)
    
    
    df_link['HK'] = df_link.iloc[:, 1] +" "+ df_link.iloc[:, 2] +" "+ df_link.iloc[:, 3] +" "+ df_link.iloc[:, 4] +" "+ df_link.iloc[:, 5] +" "+ df_link.iloc[:, 7].astype(str)
    df_link['HK']= df_link['HK'].apply(lambda w: data_vault_hash(w))  # data_vault_hash function
    df_link.iloc[:, 1]=df_link.iloc[:, 1].apply(lambda w: data_vault_hash(w))
    df_link.iloc[:, 2]=df_link.iloc[:, 2].apply(lambda w: data_vault_hash(w))
    df_link.iloc[:, 3]=df_link.iloc[:, 3].apply(lambda w: data_vault_hash(w))
    df_link.iloc[:, 4]=df_link.iloc[:, 3].apply(lambda w: data_vault_hash(w))
    df_link.iloc[:, 5]=df_link.iloc[:, 3].apply(lambda w: data_vault_hash(w))
    
    # load a table to a data base
    df_link.to_sql(f'{LINK_TAB_Name}', con=db, if_exists='replace',  index=False,
                       dtype={"HK": "TEXT NOT NULL PRIMARY KEY", f'"HK_{BK1}"':"TEXT", f'"HK_{BK2}"':"TEXT", f'"HK_{BK3}"':"TEXT", f'"HK_{BK4}"':"TEXT", f'"HK_{BK5}"':"TEXT", "LDTS": "DATETIME", "RS": "INTEGER"})
    conn.commit()

In [19]:
Link_Tab_5('LINK_Match','Match', 'id','country_id', 'league_id', 'home_team_api_id','away_team_api_id',conn)

Table created successfully
Records created successfully


In [20]:
pd.read_sql('Select * From LINK_Match;', con=conn)

Unnamed: 0,HK,HK_id,HK_country_id,HK_league_id,HK_home_team_api_id,HK_away_team_api_id,LDTS,RS
0,21b74bb92c4f87e2e01a9dc42e478c71,c4ca4238a0b923820dcc509a6f75849b,c4ca4238a0b923820dcc509a6f75849b,c4ca4238a0b923820dcc509a6f75849b,511b0d5f341bddbd9a5348923b48d14c,511b0d5f341bddbd9a5348923b48d14c,2021-02-09 19:51:20,1
1,8358f7e74ed14e5d88a84485db1bc4b0,c81e728d9d4c2f636f067f89cc14862c,c4ca4238a0b923820dcc509a6f75849b,c4ca4238a0b923820dcc509a6f75849b,511b0d5f341bddbd9a5348923b48d14c,511b0d5f341bddbd9a5348923b48d14c,2021-02-09 19:51:20,1
2,fc46eb54d602aa7af5a6ebf516f33e12,eccbc87e4b5ce2fe28308fd9f2a7baf3,c4ca4238a0b923820dcc509a6f75849b,c4ca4238a0b923820dcc509a6f75849b,511b0d5f341bddbd9a5348923b48d14c,511b0d5f341bddbd9a5348923b48d14c,2021-02-09 19:51:20,1
3,08805e44ab3283e2475f5a47cd0dda34,a87ff679a2f3e71d9181a67b7542122c,c4ca4238a0b923820dcc509a6f75849b,c4ca4238a0b923820dcc509a6f75849b,511b0d5f341bddbd9a5348923b48d14c,511b0d5f341bddbd9a5348923b48d14c,2021-02-09 19:51:20,1
4,366e1be7aab48b4242818f817688c3e6,e4da3b7fbbce2345d7772b0674a318d5,c4ca4238a0b923820dcc509a6f75849b,c4ca4238a0b923820dcc509a6f75849b,511b0d5f341bddbd9a5348923b48d14c,511b0d5f341bddbd9a5348923b48d14c,2021-02-09 19:51:20,1
...,...,...,...,...,...,...,...,...
25974,c5d10c5b772dc952730af666c0f5c5cc,acaf74112ddf8ccdaa0811c7732cfabf,7b4f363a4a6eae200c5096791b87dcf2,7b4f363a4a6eae200c5096791b87dcf2,e1d2247ac791d06023aed4001f0a733b,e1d2247ac791d06023aed4001f0a733b,2021-02-09 19:51:20,1
25975,570bb568c0e2c188b0fcb1bfa1e08324,bf2a33ef0e8482f63f7ac6c759b74daa,7b4f363a4a6eae200c5096791b87dcf2,7b4f363a4a6eae200c5096791b87dcf2,e1d2247ac791d06023aed4001f0a733b,e1d2247ac791d06023aed4001f0a733b,2021-02-09 19:51:20,1
25976,50deb2e6b969c16b941acf3e83395690,eb95e630acb84f9cff9abfc310349381,7b4f363a4a6eae200c5096791b87dcf2,7b4f363a4a6eae200c5096791b87dcf2,e1d2247ac791d06023aed4001f0a733b,e1d2247ac791d06023aed4001f0a733b,2021-02-09 19:51:20,1
25977,65528aad057f0c0de7206881bb3ff389,937315cf79eb4801d8474c4bc6558eee,7b4f363a4a6eae200c5096791b87dcf2,7b4f363a4a6eae200c5096791b87dcf2,e1d2247ac791d06023aed4001f0a733b,e1d2247ac791d06023aed4001f0a733b,2021-02-09 19:51:20,1


In [7]:
def hsat_table(source_table: str, primary_key, db, *args):
    """takes source table, definiert a DF, create a new DF and save to SQlite as a HSAT table"""
    
    # converts DF
    df = pd.read_sql(f'Select * From {source_table};', con=db)
    
    # finds primary key
    #primary_key = get_primary_key(source_table, db)  # get_primary_key function
    
    # creates new DF from source df 
    df_2 = df[primary_key].to_frame(name=f"HK_{source_table}")
    
    # creates LDTS and EDTS(12 hours later) column
    now = dt.datetime.now()
    load_time = now.strftime("%Y/%m/%d %H:%M:%S")  # LDTS
    df_2["LDTS"] = load_time

    end = now + dt.timedelta(hours=12)
    end_time = end.strftime("%Y/%m/%d %H:%M:%S")  # EDTS
    df_2["EDTS"] = end_time
    
    # creates RS column
    df_2["RS"] = 1
    
    # creates HK_source_table with hash
    df_2[f"HK_{source_table}"] = df_2.apply(lambda row : str(row[f"HK_{source_table}"])+ '_' + str(row["RS"]), axis=1)
    df_2[f"HK_{source_table}"] = df_2.apply(lambda row: data_vault_hash(row[f"HK_{source_table}"]), axis=1)

    # creates HK column for HSAT with hash
    df_2["HK"] = df_2.apply(lambda row : str(row[f"HK_{source_table}"])+ '_' + str(row["RS"])+'_'+str(row["LDTS"]), axis=1)
    df_2["HK"] = df_2.apply(lambda row: data_vault_hash(row["HK"]), axis=1)
    
    # create attribute columns
    attribute_list = list(args)
    for attribute in attribute_list:
        df_2[f"{attribute}"] = df[attribute]
        
    # create Hash_Diff
    att_num = len(args)
    df_2["HD"] = ""
    
    for i in range(1, att_num):
        df_2["HD"] = str(df_2["HD"]) +"_"+ str(df_2.iloc[:,-i])        
    
    df_2["HD"] = df_2.apply(lambda row: data_vault_hash(row["HD"]), axis=1)

    # column allocation (HK -> 1, HD -> 5)
    first_col = df_2.pop("HK")
    df_2.insert(0, "HK", first_col)
    fifth_col = df_2.pop("HD")
    df_2.insert(5, "HD", fifth_col)
    
    # load df as a table to SQL
    df_2.to_sql(f'HSAT_{source_table}', con=db, if_exists='replace',  index=False,
                dtype={"HK": "TEXT NOT NULL PRIMARY KEY", f"HK_{source_table}":"TEXT", "LDTS": "DATETIME", "EDTS": "DATETIME",
                        "RS": "INTEGER", "HD": "TEXT", f"{args}":"TEXT"})
    
    df_HSAT = pd.read_sql(f'Select * From HSAT_{source_table};', con=db)
    return df_HSAT.head()

In [22]:
hsat_table("Team","id", conn, "team_fifa_api_id", "team_long_name")

Unnamed: 0,HK,HK_Team,LDTS,EDTS,RS,HD,team_fifa_api_id,team_long_name
0,5d3a4c0caeba2f0e76421264ec91a8a0,ec308451c1d095c528cfa3c009ea7235,2021/02/09 20:51:27,2021/02/10 08:51:27,1,4a6c5f66015f8686828ba1996fbd9551,673.0,KRC Genk
1,a06c57c36cbf849654716c7e0169047c,f9c340648e746ce4f8ea6dde4e3538f9,2021/02/09 20:51:27,2021/02/10 08:51:27,1,4a6c5f66015f8686828ba1996fbd9551,675.0,Beerschot AC
2,70c817c1205e7da4696adb618dfc30ca,f860ba666ed657944d19ca051e58cd2c,2021/02/09 20:51:27,2021/02/10 08:51:27,1,4a6c5f66015f8686828ba1996fbd9551,15005.0,SV Zulte-Waregem
3,0a4da1b69ba4d47efa366e0f750cb3b7,50f56cf872d90aa1c22a50bfce629cb6,2021/02/09 20:51:27,2021/02/10 08:51:27,1,4a6c5f66015f8686828ba1996fbd9551,2007.0,Sporting Lokeren
4,8caa8ce81d31b192dcfef844c9e0cef1,da2544386071b1b9c40be5d42cba47e9,2021/02/09 20:51:27,2021/02/10 08:51:27,1,4a6c5f66015f8686828ba1996fbd9551,1750.0,KSV Cercle Brugge


In [23]:
get_column_names('Team',conn)

['id', 'team_api_id', 'team_fifa_api_id', 'team_long_name', 'team_short_name']

In [24]:
hsat_table('Team_Attributes',"id", conn, 'buildUpPlaySpeed','buildUpPlaySpeedClass',
 'buildUpPlayDribbling',
 'buildUpPlayDribblingClass',
 'buildUpPlayPassing',
 'buildUpPlayPassingClass',
 'buildUpPlayPositioningClass',
 'chanceCreationPassing',
 'chanceCreationPassingClass',
 'chanceCreationCrossing',
 'chanceCreationCrossingClass',
 'chanceCreationShooting',
 'chanceCreationShootingClass',
 'chanceCreationPositioningClass',
 'defencePressure',
 'defencePressureClass',
 'defenceAggression',
 'defenceAggressionClass',
 'defenceTeamWidth',
 'defenceTeamWidthClass',
 'defenceDefenderLineClass')

Unnamed: 0,HK,HK_Team_Attributes,LDTS,EDTS,RS,HD,buildUpPlaySpeed,buildUpPlaySpeedClass,buildUpPlayDribbling,buildUpPlayDribblingClass,...,chanceCreationShooting,chanceCreationShootingClass,chanceCreationPositioningClass,defencePressure,defencePressureClass,defenceAggression,defenceAggressionClass,defenceTeamWidth,defenceTeamWidthClass,defenceDefenderLineClass
0,fd32b5ff47330b5c58f71fdcdc62b0ae,ec308451c1d095c528cfa3c009ea7235,2021/02/09 20:51:30,2021/02/10 08:51:30,1,13b1c5b8519c218b15e6b856ce1d6bcb,60,Balanced,,Little,...,55,Normal,Organised,50,Medium,55,Press,45,Normal,Cover
1,263ea8fffc13760ccffc8cb82867d84f,f9c340648e746ce4f8ea6dde4e3538f9,2021/02/09 20:51:30,2021/02/10 08:51:30,1,13b1c5b8519c218b15e6b856ce1d6bcb,52,Balanced,48.0,Normal,...,64,Normal,Organised,47,Medium,44,Press,54,Normal,Cover
2,65e53f2e2f6b76ace81852906083f926,f860ba666ed657944d19ca051e58cd2c,2021/02/09 20:51:30,2021/02/10 08:51:30,1,13b1c5b8519c218b15e6b856ce1d6bcb,47,Balanced,41.0,Normal,...,64,Normal,Organised,47,Medium,44,Press,54,Normal,Cover
3,b94c59703d28cf4ddfc59ca0cc20438f,50f56cf872d90aa1c22a50bfce629cb6,2021/02/09 20:51:30,2021/02/10 08:51:30,1,13b1c5b8519c218b15e6b856ce1d6bcb,70,Fast,,Little,...,70,Lots,Organised,60,Medium,70,Double,70,Wide,Cover
4,119eed56d3da4d58842294faef034180,da2544386071b1b9c40be5d42cba47e9,2021/02/09 20:51:30,2021/02/10 08:51:30,1,13b1c5b8519c218b15e6b856ce1d6bcb,47,Balanced,,Little,...,52,Normal,Organised,47,Medium,47,Press,52,Normal,Cover


In [25]:
get_column_names('Team_Attributes',conn)

['id',
 'team_fifa_api_id',
 'team_api_id',
 'date',
 'buildUpPlaySpeed',
 'buildUpPlaySpeedClass',
 'buildUpPlayDribbling',
 'buildUpPlayDribblingClass',
 'buildUpPlayPassing',
 'buildUpPlayPassingClass',
 'buildUpPlayPositioningClass',
 'chanceCreationPassing',
 'chanceCreationPassingClass',
 'chanceCreationCrossing',
 'chanceCreationCrossingClass',
 'chanceCreationShooting',
 'chanceCreationShootingClass',
 'chanceCreationPositioningClass',
 'defencePressure',
 'defencePressureClass',
 'defenceAggression',
 'defenceAggressionClass',
 'defenceTeamWidth',
 'defenceTeamWidthClass',
 'defenceDefenderLineClass']

In [26]:
hsat_table("Player","id", conn, 'player_name',
 'player_fifa_api_id',
 'birthday',
 'height',
 'weight')

Unnamed: 0,HK,HK_Player,LDTS,EDTS,RS,HD,player_name,player_fifa_api_id,birthday,height,weight
0,5f49efbe6a39b04b7521f3233822f96c,ec308451c1d095c528cfa3c009ea7235,2021/02/09 20:51:33,2021/02/10 08:51:33,1,26990982c770138f326d62f6e62bf418,Aaron Appindangoye,218353,1992-02-29 00:00:00,182.88,187
1,7bb39a3ecefed34cbd8f0155f5857ea2,f9c340648e746ce4f8ea6dde4e3538f9,2021/02/09 20:51:33,2021/02/10 08:51:33,1,26990982c770138f326d62f6e62bf418,Aaron Cresswell,189615,1989-12-15 00:00:00,170.18,146
2,2665e652a078fd016352e13a93299996,f860ba666ed657944d19ca051e58cd2c,2021/02/09 20:51:33,2021/02/10 08:51:33,1,26990982c770138f326d62f6e62bf418,Aaron Doran,186170,1991-05-13 00:00:00,170.18,163
3,6c6eaf67438f99bbb4003627babc0737,50f56cf872d90aa1c22a50bfce629cb6,2021/02/09 20:51:33,2021/02/10 08:51:33,1,26990982c770138f326d62f6e62bf418,Aaron Galindo,140161,1982-05-08 00:00:00,182.88,198
4,d610e9fdd6a28c45d18fd1eff09f7375,da2544386071b1b9c40be5d42cba47e9,2021/02/09 20:51:33,2021/02/10 08:51:33,1,26990982c770138f326d62f6e62bf418,Aaron Hughes,17725,1979-11-08 00:00:00,182.88,154


In [27]:
get_column_names('Player',conn)

['id',
 'player_api_id',
 'player_name',
 'player_fifa_api_id',
 'birthday',
 'height',
 'weight']

In [28]:
hsat_table("Player_Attributes","id", conn, 'overall_rating',
 'potential',
 'preferred_foot',
 'attacking_work_rate',
 'defensive_work_rate',
 'crossing',
 'finishing',
 'heading_accuracy',
 'short_passing',
 'volleys',
 'dribbling',
 'curve',
 'free_kick_accuracy',
 'long_passing',
 'ball_control',
 'acceleration',
 'sprint_speed',
 'agility',
 'reactions',
 'balance',
 'shot_power',
 'jumping',
 'stamina',
 'strength',
 'long_shots',
 'aggression',
 'interceptions',
 'positioning',
 'vision',
 'penalties',
 'marking',
 'standing_tackle',
 'sliding_tackle',
 'gk_diving',
 'gk_handling',
 'gk_kicking',
 'gk_positioning',
 'gk_reflexes')

Unnamed: 0,HK,HK_Player_Attributes,LDTS,EDTS,RS,HD,overall_rating,potential,preferred_foot,attacking_work_rate,...,vision,penalties,marking,standing_tackle,sliding_tackle,gk_diving,gk_handling,gk_kicking,gk_positioning,gk_reflexes
0,f0380e8929e9a1fbf183f3d17d14a4ba,ec308451c1d095c528cfa3c009ea7235,2021/02/09 20:51:41,2021/02/10 08:51:41,1,f586170015d97ac9d11c7deaa84ab52b,67.0,71.0,right,medium,...,54.0,48.0,65.0,69.0,69.0,6.0,11.0,10.0,8.0,8.0
1,8fa425d71d06aa67710ddb1cb1960c79,f9c340648e746ce4f8ea6dde4e3538f9,2021/02/09 20:51:41,2021/02/10 08:51:41,1,f586170015d97ac9d11c7deaa84ab52b,67.0,71.0,right,medium,...,54.0,48.0,65.0,69.0,69.0,6.0,11.0,10.0,8.0,8.0
2,d1eda215a804acd324d5cf498697896b,f860ba666ed657944d19ca051e58cd2c,2021/02/09 20:51:41,2021/02/10 08:51:41,1,f586170015d97ac9d11c7deaa84ab52b,62.0,66.0,right,medium,...,54.0,48.0,65.0,66.0,69.0,6.0,11.0,10.0,8.0,8.0
3,85e4c288dd4989aeb84f0ce7810ca5f4,50f56cf872d90aa1c22a50bfce629cb6,2021/02/09 20:51:41,2021/02/10 08:51:41,1,f586170015d97ac9d11c7deaa84ab52b,61.0,65.0,right,medium,...,53.0,47.0,62.0,63.0,66.0,5.0,10.0,9.0,7.0,7.0
4,2be2ceb6edfc6f0101da5f07394a9bb4,da2544386071b1b9c40be5d42cba47e9,2021/02/09 20:51:41,2021/02/10 08:51:41,1,f586170015d97ac9d11c7deaa84ab52b,61.0,65.0,right,medium,...,53.0,47.0,62.0,63.0,66.0,5.0,10.0,9.0,7.0,7.0


In [29]:
get_column_names('Player_Attributes',conn)

['id',
 'player_fifa_api_id',
 'player_api_id',
 'date',
 'overall_rating',
 'potential',
 'preferred_foot',
 'attacking_work_rate',
 'defensive_work_rate',
 'crossing',
 'finishing',
 'heading_accuracy',
 'short_passing',
 'volleys',
 'dribbling',
 'curve',
 'free_kick_accuracy',
 'long_passing',
 'ball_control',
 'acceleration',
 'sprint_speed',
 'agility',
 'reactions',
 'balance',
 'shot_power',
 'jumping',
 'stamina',
 'strength',
 'long_shots',
 'aggression',
 'interceptions',
 'positioning',
 'vision',
 'penalties',
 'marking',
 'standing_tackle',
 'sliding_tackle',
 'gk_diving',
 'gk_handling',
 'gk_kicking',
 'gk_positioning',
 'gk_reflexes']

In [30]:
hsat_table("League","id", conn, 'country_id', 'name')

Unnamed: 0,HK,HK_League,LDTS,EDTS,RS,HD,country_id,name
0,cc920a0c3f1b99af6e246b306495630b,ec308451c1d095c528cfa3c009ea7235,2021/02/09 20:52:08,2021/02/10 08:52:08,1,69c8fcf4ba48ea442ad7da3b69de3c5a,1,Belgium Jupiler League
1,1a36c137d093d40ac6336b006bf42589,0bdc65e4364db54d79b628670f347490,2021/02/09 20:52:08,2021/02/10 08:52:08,1,69c8fcf4ba48ea442ad7da3b69de3c5a,1729,England Premier League
2,ad99b135d0f17fad09d6fd2f752bfcda,248eeffc41601122c9cbcb4ff65441a6,2021/02/09 20:52:08,2021/02/10 08:52:08,1,69c8fcf4ba48ea442ad7da3b69de3c5a,4769,France Ligue 1
3,61fe8bc367cc10fe6af385269855621f,e0bd49f6b7486603d558fc3a81018a9c,2021/02/09 20:52:08,2021/02/10 08:52:08,1,69c8fcf4ba48ea442ad7da3b69de3c5a,7809,Germany 1. Bundesliga
4,1c733044a860be890e6dd371506bd556,2963f320cccf82d389a3b13ac26373a7,2021/02/09 20:52:08,2021/02/10 08:52:08,1,69c8fcf4ba48ea442ad7da3b69de3c5a,10257,Italy Serie A


In [31]:
get_column_names('League',conn)

['id', 'country_id', 'name']

In [8]:
hsat_table("Match","id", conn, 'home_team_api_id',
 'away_team_api_id',
 'home_team_goal',
 'away_team_goal',
 'goal',
 'shoton',
 'shotoff',
 'foulcommit',
 'card',
 'cross',
 'corner',
 'possession',
 'B365H',
 'B365D',
 'B365A',
 'BWH',
 'BWD',
 'BWA',
 'IWH',
 'IWD',
 'IWA',
 'LBH',
 'LBD',
 'LBA',
 'PSH',
 'PSD',
 'PSA',
 'WHH',
 'WHD',
 'WHA',
 'SJH',
 'SJD',
 'SJA',
 'VCH',
 'VCD',
 'VCA',
 'GBH',
 'GBD',
 'GBA',
 'BSH',
 'BSD',
 'BSA')

Unnamed: 0,HK,HK_Match,LDTS,EDTS,RS,HD,home_team_api_id,away_team_api_id,home_team_goal,away_team_goal,...,SJA,VCH,VCD,VCA,GBH,GBD,GBA,BSH,BSD,BSA
0,dbd16b79182b5f67aa92ef2b7f3e773a,ec308451c1d095c528cfa3c009ea7235,2021/02/10 08:57:30,2021/02/10 20:57:30,1,c4b37213e46df0227bdb553c01416ec8,9987,9993,1,1,...,4.0,1.65,3.4,4.5,1.78,3.25,4.0,1.73,3.4,4.2
1,2afbdb48f8578a29d04258850a84a095,f9c340648e746ce4f8ea6dde4e3538f9,2021/02/10 08:57:30,2021/02/10 20:57:30,1,c4b37213e46df0227bdb553c01416ec8,10000,9994,0,0,...,3.8,2.0,3.25,3.25,1.85,3.25,3.75,1.91,3.25,3.6
2,15a444b6a4f34c829c24f84008c08e36,f860ba666ed657944d19ca051e58cd2c,2021/02/10 08:57:30,2021/02/10 20:57:30,1,c4b37213e46df0227bdb553c01416ec8,9984,8635,0,3,...,2.5,2.35,3.25,2.65,2.5,3.2,2.5,2.3,3.2,2.75
3,5d7e664fe32a36d3e2111c13513bff36,50f56cf872d90aa1c22a50bfce629cb6,2021/02/10 08:57:30,2021/02/10 20:57:30,1,c4b37213e46df0227bdb553c01416ec8,9991,9998,5,0,...,7.5,1.45,3.75,6.5,1.5,3.75,5.5,1.44,3.75,6.5
4,ea8915756be93b0ea5b0436cf60abdeb,da2544386071b1b9c40be5d42cba47e9,2021/02/10 08:57:30,2021/02/10 20:57:30,1,c4b37213e46df0227bdb553c01416ec8,7947,9985,1,3,...,1.73,4.5,3.4,1.65,4.5,3.5,1.65,4.75,3.3,1.67


In [33]:
get_column_names('Match',conn)

['id',
 'country_id',
 'league_id',
 'season',
 'stage',
 'date',
 'match_api_id',
 'home_team_api_id',
 'away_team_api_id',
 'home_team_goal',
 'away_team_goal',
 'home_player_X1',
 'home_player_X2',
 'home_player_X3',
 'home_player_X4',
 'home_player_X5',
 'home_player_X6',
 'home_player_X7',
 'home_player_X8',
 'home_player_X9',
 'home_player_X10',
 'home_player_X11',
 'away_player_X1',
 'away_player_X2',
 'away_player_X3',
 'away_player_X4',
 'away_player_X5',
 'away_player_X6',
 'away_player_X7',
 'away_player_X8',
 'away_player_X9',
 'away_player_X10',
 'away_player_X11',
 'home_player_Y1',
 'home_player_Y2',
 'home_player_Y3',
 'home_player_Y4',
 'home_player_Y5',
 'home_player_Y6',
 'home_player_Y7',
 'home_player_Y8',
 'home_player_Y9',
 'home_player_Y10',
 'home_player_Y11',
 'away_player_Y1',
 'away_player_Y2',
 'away_player_Y3',
 'away_player_Y4',
 'away_player_Y5',
 'away_player_Y6',
 'away_player_Y7',
 'away_player_Y8',
 'away_player_Y9',
 'away_player_Y10',
 'away_player

In [None]:
hsat_table("Match","id", conn, 'season', 'date')

In [34]:
get_table_names(conn)

['Player_Attributes',
 'Player',
 'Match',
 'League',
 'Country',
 'Team',
 'Team_Attributes',
 'HUB_Player',
 'HUB_Team',
 'HUB_League',
 'LINK_TAB_Match_Player',
 'LINK_Match',
 'HSAT_Team',
 'HSAT_Team_Attributes',
 'HSAT_Player',
 'HSAT_Player_Attributes',
 'HSAT_League',
 'HSAT_Match']