In [1]:
import sqlite3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns', None)

In [2]:
def make_query(query, filepath='database.sqlite'):
    """
    Execute a query on a SQLite database file and return the results as a pandas DataFrame.

    Parameters
    ----------
    query : str
        SQL query to be executed on the database file.
    filepath : str, optional
        Path to the database file, by default 'database.sqlite'.

    Returns
    -------
    pd.DataFrame
        A pandas DataFrame containing the results of the executed query.
    """
    with sqlite3.connect(filepath) as connection:
        cursor = connection.cursor()
        execution = cursor.execute(query)
        result = pd.DataFrame(execution.fetchall(), columns=[i[0] for i in execution.description])
        return result

In [3]:
player_attributes = make_query(""" SELECT * FROM player_attributes """)
player = make_query(""" SELECT * FROM player """)
match = make_query(""" SELECT * FROM match """)
team = make_query(""" SELECT * FROM team """)
team_attributes = make_query(""" SELECT * FROM team_attributes """)
league = make_query(""" SELECT * FROM league """)
country = make_query(""" SELECT * FROM country """)

In [22]:
match_df = pd.DataFrame()

ls = ['WHH', 'WHD', 'WHA']
for x in ls:
    match_df[x] = 100 / match[x]
match_df['total'] = match_df['WHH'] + match_df['WHD'] + match_df['WHA']
match_df

Unnamed: 0,WHH,WHD,WHA,total
0,58.823529,30.303030,23.094688,112.221248
1,54.644809,30.303030,27.777778,112.725617
2,40.000000,30.769231,41.666667,112.435897
3,69.444444,26.666667,16.666667,112.777778
4,23.809524,29.411765,58.823529,112.044818
...,...,...,...,...
25974,,,,
25975,,,,
25976,,,,
25977,,,,


In [30]:
match_test = match_df
lis = []
for row in match_df.itertuples():
    if row.WHD < row.WHH > row.WHA:
         lis.append('win')
    elif row.WHH < row.WHD > row.WHA:
        lis.append('draw')
    else:
        lis.append('defeat')
match_df['Result_2'] = lis
match_df.dropna()


Unnamed: 0,WHH,WHD,WHA,total,Result,Result_2
0,58.823529,30.303030,23.094688,112.221248,win,win
1,54.644809,30.303030,27.777778,112.725617,win,win
2,40.000000,30.769231,41.666667,112.435897,defeat,defeat
3,69.444444,26.666667,16.666667,112.777778,win,win
4,23.809524,29.411765,58.823529,112.044818,defeat,defeat
...,...,...,...,...,...,...
24552,61.728395,30.303030,14.285714,106.317140,win,win
24553,42.016807,32.258065,32.258065,106.532936,win,win
24554,63.694268,28.571429,14.285714,106.551410,win,win
24555,41.666667,32.258065,32.258065,106.182796,win,win


In [29]:
result = []
for row in match.itertuples():
    if row.home_team_goal == row.away_team_goal:
        result.append('draw')
    elif row.home_team_goal > row.away_team_goal:
        result.append('win')
    else:
        result.append('defeat')
result
match['Result'] = result
match.dropna(subset=['WHH', 'WHD', 'WHA'])

Unnamed: 0,id,country_id,league_id,season,stage,date,match_api_id,home_team_api_id,away_team_api_id,home_team_goal,away_team_goal,home_player_X1,home_player_X2,home_player_X3,home_player_X4,home_player_X5,home_player_X6,home_player_X7,home_player_X8,home_player_X9,home_player_X10,home_player_X11,away_player_X1,away_player_X2,away_player_X3,away_player_X4,away_player_X5,away_player_X6,away_player_X7,away_player_X8,away_player_X9,away_player_X10,away_player_X11,home_player_Y1,home_player_Y2,home_player_Y3,home_player_Y4,home_player_Y5,home_player_Y6,home_player_Y7,home_player_Y8,home_player_Y9,home_player_Y10,home_player_Y11,away_player_Y1,away_player_Y2,away_player_Y3,away_player_Y4,away_player_Y5,away_player_Y6,away_player_Y7,away_player_Y8,away_player_Y9,away_player_Y10,away_player_Y11,home_player_1,home_player_2,home_player_3,home_player_4,home_player_5,home_player_6,home_player_7,home_player_8,home_player_9,home_player_10,home_player_11,away_player_1,away_player_2,away_player_3,away_player_4,away_player_5,away_player_6,away_player_7,away_player_8,away_player_9,away_player_10,away_player_11,goal,shoton,shotoff,foulcommit,card,cross,corner,possession,B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,LBH,LBD,LBA,PSH,PSD,PSA,WHH,WHD,WHA,SJH,SJD,SJA,VCH,VCD,VCA,GBH,GBD,GBA,BSH,BSD,BSA,pro,Result
0,1,1,1,2008/2009,1,2008-08-17 00:00:00,492473,9987,9993,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.73,3.40,5.00,1.75,3.35,4.20,1.85,3.2,3.50,1.80,3.30,3.75,,,,1.70,3.30,4.33,1.90,3.3,4.00,1.65,3.40,4.50,1.78,3.25,4.00,1.73,3.40,4.20,30.303030,draw
1,2,1,1,2008/2009,1,2008-08-16 00:00:00,492474,10000,9994,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.95,3.20,3.60,1.80,3.30,3.95,1.90,3.2,3.50,1.90,3.20,3.50,,,,1.83,3.30,3.60,1.95,3.3,3.80,2.00,3.25,3.25,1.85,3.25,3.75,1.91,3.25,3.60,30.303030,draw
2,3,1,1,2008/2009,1,2008-08-16 00:00:00,492475,9984,8635,0,3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.38,3.30,2.75,2.40,3.30,2.55,2.60,3.1,2.30,2.50,3.20,2.50,,,,2.50,3.25,2.40,2.63,3.3,2.50,2.35,3.25,2.65,2.50,3.20,2.50,2.30,3.20,2.75,30.769231,defeat
3,4,1,1,2008/2009,1,2008-08-17 00:00:00,492476,9991,9998,5,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.44,3.75,7.50,1.40,4.00,6.80,1.40,3.9,6.00,1.44,3.60,6.50,,,,1.44,3.75,6.00,1.44,4.0,7.50,1.45,3.75,6.50,1.50,3.75,5.50,1.44,3.75,6.50,26.666667,win
4,5,1,1,2008/2009,1,2008-08-16 00:00:00,492477,7947,9985,1,3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.00,3.50,1.65,5.00,3.50,1.60,4.00,3.3,1.70,4.00,3.40,1.72,,,,4.20,3.40,1.70,4.50,3.5,1.73,4.50,3.40,1.65,4.50,3.50,1.65,4.75,3.30,1.67,29.411765,defeat
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24552,24553,21518,21518,2015/2016,9,2015-10-25 00:00:00,2030167,9906,10267,2,1,1.0,2.0,4.0,6.0,8.0,3.0,5.0,7.0,3.0,5.0,7.0,1.0,2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0,4.0,6.0,1.0,3.0,3.0,3.0,3.0,7.0,7.0,7.0,10.0,10.0,10.0,1.0,3.0,3.0,3.0,3.0,7.0,7.0,7.0,7.0,10.0,10.0,177126.0,38424.0,474589.0,56678.0,41167.0,33635.0,30871.0,184533.0,354467.0,51360.0,184138.0,489240.0,361757.0,184999.0,361710.0,391058.0,75307.0,40148.0,570432.0,361770.0,193869.0,428947.0,<goal><value><comment>n</comment><stats><goals...,<shoton><value><stats><shoton>1</shoton></stat...,<shotoff><value><stats><shotoff>1</shotoff></s...,<foulcommit><value><stats><foulscommitted>1</f...,<card><value><comment>y</comment><stats><ycard...,<cross><value><stats><crosses>1</crosses></sta...,<corner><value><stats><corners>1</corners></st...,<possession><value><comment>49</comment><stats...,1.57,3.80,6.50,1.57,4.00,6.50,1.65,3.7,4.90,1.57,3.75,6.50,1.58,4.05,7.06,1.62,3.30,7.00,,,,1.57,4.00,7.00,,,,,,,30.303030,win
24553,24554,21518,21518,2015/2016,9,2015-10-24 00:00:00,2030168,9864,9783,2,0,1.0,2.0,4.0,6.0,8.0,4.0,6.0,3.0,5.0,7.0,5.0,1.0,2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0,4.0,6.0,1.0,3.0,3.0,3.0,3.0,6.0,6.0,8.0,8.0,8.0,11.0,1.0,3.0,3.0,3.0,3.0,7.0,7.0,7.0,7.0,10.0,10.0,25563.0,104382.0,477471.0,41707.0,38842.0,39198.0,238841.0,261599.0,37450.0,215927.0,213653.0,32678.0,103905.0,191906.0,121872.0,33848.0,279173.0,51953.0,40668.0,474680.0,37653.0,194660.0,<goal><value><comment>n</comment><stats><goals...,<shoton><value><event_incident_typefk>876</eve...,<shotoff><value><stats><shotoff>1</shotoff></s...,<foulcommit><value><stats><foulscommitted>1</f...,<card><value><comment>y</comment><stats><ycard...,<cross><value><stats><crosses>1</crosses></sta...,<corner><value><stats><corners>1</corners></st...,<possession><value><comment>53</comment><stats...,2.25,3.25,3.40,2.35,3.10,3.10,2.20,3.2,3.20,2.25,3.20,3.25,2.36,3.36,3.36,2.38,3.10,3.10,,,,2.30,3.40,3.40,,,,,,,32.258065,win
24554,24555,21518,21518,2015/2016,9,2015-10-26 00:00:00,2030169,8315,9869,3,0,1.0,2.0,4.0,6.0,8.0,4.0,5.0,6.0,3.0,7.0,5.0,1.0,2.0,4.0,6.0,8.0,3.0,5.0,7.0,7.0,3.0,5.0,1.0,3.0,3.0,3.0,3.0,6.0,8.0,6.0,8.0,8.0,11.0,1.0,3.0,3.0,3.0,3.0,7.0,7.0,7.0,10.0,10.0,10.0,33764.0,181297.0,159881.0,411617.0,154938.0,182224.0,33871.0,45749.0,604105.0,96619.0,33028.0,102394.0,150480.0,384374.0,150764.0,239965.0,359191.0,202639.0,242477.0,541557.0,395154.0,206301.0,<goal><value><comment>n</comment><stats><goals...,<shoton><value><stats><blocked>1</blocked></st...,<shotoff><value><stats><shotoff>1</shotoff></s...,<foulcommit><value><stats><foulscommitted>1</f...,<card><value><comment>y</comment><stats><ycard...,<cross><value><stats><crosses>1</crosses></sta...,<corner><value><stats><corners>1</corners></st...,<possession><value><comment>49</comment><stats...,1.53,4.00,7.00,1.55,4.00,6.50,1.60,3.7,5.40,1.50,4.00,6.50,1.55,4.31,6.95,1.57,3.50,7.00,,,,1.55,4.20,7.00,,,,,,,28.571429,win
24555,24556,21518,21518,2015/2016,9,2015-10-24 00:00:00,2030170,7878,8603,1,1,1.0,2.0,4.0,6.0,8.0,3.0,5.0,7.0,3.0,5.0,7.0,1.0,2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0,4.0,6.0,1.0,3.0,3.0,3.0,3.0,7.0,7.0,7.0,10.0,10.0,10.0,1.0,3.0,3.0,3.0,3.0,7.0,7.0,7.0,7.0,10.0,10.0,104986.0,261609.0,108572.0,2752.0,239350.0,2805.0,170667.0,80492.0,534484.0,161291.0,43372.0,40648.0,239964.0,268273.0,27461.0,445907.0,37824.0,111801.0,535600.0,193226.0,114746.0,96652.0,<goal><value><comment>n</comment><stats><goals...,<shoton><value><stats><shoton>1</shoton></stat...,<shotoff><value><stats><shotoff>1</shotoff></s...,<foulcommit><value><stats><foulscommitted>1</f...,<card><value><comment>y</comment><stats><ycard...,<cross><value><stats><crosses>1</crosses></sta...,<corner><value><stats><corners>1</corners></st...,<possession><value><comment>50</comment><stats...,2.30,3.25,3.25,2.35,3.25,3.00,2.40,3.3,2.75,2.30,3.20,3.10,2.34,3.40,3.35,2.40,3.10,3.10,,,,2.30,3.40,3.30,,,,,,,32.258065,draw


In [33]:
match_eind = pd.DataFrame()

match_eind['Result'] = match['Result']
match_eind['Result_2'] = match_df['Result_2']
match_eind

Unnamed: 0,Result,Result_2
0,draw,win
1,draw,win
2,defeat,defeat
3,win,win
4,defeat,defeat
...,...,...
25974,win,defeat
25975,defeat,defeat
25976,win,defeat
25977,draw,defeat


In [38]:
ant = 0
for row in match_eind.itertuples():
    if row.Result == row.Result_2:
        ant = ant + 1
    else: 
        ant = ant + 0
ant

print(ant  / 25979)

0.49986527579968437
