In [1]:
from io import StringIO
import pandas as pd
from sqlalchemy import create_engine, text
engine = create_engine('sqlite://', echo=False)
import sqlite3
import contextlib

def df_to_database(df, name):
    """
       Deze functie maakt een database van een dataframe.
       Parameters:
                  df  = de dataframe waarvan je een database tabel wil maken.
                         
                  name = De naam die je de database wil geven.
                         Zorg ervoor dat deze parameter een string is.
    """
    df.to_sql(name, con=engine)
    
    display(df)

def string_to_df(string):
    """
       Deze functie maakt van een string een dataframe,
       zolang het standaard CSV formaat wordt aangehouden.
       Parameters:
                  string = de string waarvan je een dataframe wil maken.
    """
    return pd.read_csv(StringIO(string))

def show_query(query):
    """
       Deze functie print de output van je SQL query uit.
       Parameters:
                  query = Je SQL query.
                          Zorg ervoor dat deze parameter een string is.
    """

    query_output = engine.execute(query).fetchall()
    column_names = engine.execute(query)._metadata.keys
    return pd.DataFrame(query_output, columns = column_names)


sql_connect = sqlite3.connect('database.sqlite')
cursor = sql_connect.cursor()


query_r = "SELECT * FROM sqlite_sequence" # read query


sql_connect = sqlite3.connect('database.sqlite')
cursor = sql_connect.cursor()
df = pd.read_sql_query(query_r,sql_connect)


import contextlib
@contextlib.contextmanager
def db_read_only(file):
    sql_connect = sqlite3.connect('file:'+file+'?mode=ro',uri=True)
    cursor = sql_connect.cursor()
    yield sql_connect
    sql_connect.commit()
    sql_connect.close()
    
    
file = 'database.sqlite'
with db_read_only(file) as ro:
    data = pd.read_sql_query(query_r, ro)
    
    
df_list = []
with db_read_only(file) as ro:
    for table in data['name']:
        query_r = 'SELECT * FROM ' + str(table)
        data = pd.read_sql_query(query_r, ro)
        df_list.append(data)


In [2]:
team = df_list[0]
country = df_list[1]
league = df_list[2]
match = df_list[3]
player = df_list[4]
player_attributes = df_list[5]
team_attributes = df_list[6]

# OPDRACHT 1

In [3]:
variable = ["potential", "crossing", 'finishing', 'heading_accuracy',
       'short_passing', 'volleys', 'dribbling', 'curve', 'free_kick_accuracy',
       'long_passing', 'ball_control', 'acceleration', 'sprint_speed',
       'agility', 'reactions', 'balance', 'shot_power', 'jumping', 'stamina',
       'strength', 'long_shots', 'aggression', 'interceptions', 'positioning',
       'vision', 'penalties', 'marking', 'standing_tackle', 'sliding_tackle',
       'gk_diving', 'gk_handling', 'gk_kicking', 'gk_positioning',
       'gk_reflexes', ]


merge_on = player_attributes.merge(player, on = "player_fifa_api_id", how = 'left')
df = merge_on.groupby('player_name')[variable].mean()
df

row_count = df.sum(axis=1)
df['avg_rating'] = row_count / 34
avg_attributes = pd.DataFrame(df['avg_rating'])


avg_attributes

Unnamed: 0_level_0,avg_rating
player_name,Unnamed: 1_level_1
Aaron Appindangoye,48.670588
Aaron Cresswell,55.911765
Aaron Doran,53.321267
Aaron Galindo,50.122762
Aaron Hughes,50.808235
...,...
Zsolt Low,56.567227
Zurab Khizanishvili,53.922794
Zvjezdan Misimovic,61.082353
de Oliveira Cleber Monteiro,54.908497


# OPDRACHT 2

In [4]:
match2 = match[['match_api_id', 'home_player_1', 'home_player_2',
       'home_player_3', 'home_player_4', 'home_player_5', 'home_player_6',
       'home_player_7', 'home_player_8', 'home_player_9',
       'home_player_10', 'home_player_11', 'away_player_1',
       'away_player_2', 'away_player_3', 'away_player_4', 'away_player_5',
       'away_player_6', 'away_player_7', 'away_player_8', 'away_player_9',
       'away_player_10', 'away_player_11']]
match2

Unnamed: 0,match_api_id,home_player_1,home_player_2,home_player_3,home_player_4,home_player_5,home_player_6,home_player_7,home_player_8,home_player_9,...,away_player_2,away_player_3,away_player_4,away_player_5,away_player_6,away_player_7,away_player_8,away_player_9,away_player_10,away_player_11
0,492473,,,,,,,,,,...,,,,,,,,,,
1,492474,,,,,,,,,,...,,,,,,,,,,
2,492475,,,,,,,,,,...,,,,,,,,,,
3,492476,,,,,,,,,,...,,,,,,,,,,
4,492477,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25974,1992091,42231.0,678384.0,95220.0,638592.0,413155.0,45780.0,171229.0,67333.0,119839.0,...,563066.0,8800.0,67304.0,158253.0,133126.0,186524.0,93223.0,121115.0,232110.0,289732.0
25975,1992092,33272.0,41621.0,25813.0,257845.0,114735.0,42237.0,113227.0,358156.0,32343.0,...,114792.0,150007.0,178119.0,27232.0,570830.0,260708.0,201704.0,36382.0,34082.0,95257.0
25976,1992093,157856.0,274779.0,177689.0,294256.0,42258.0,39979.0,173936.0,147959.0,451983.0,...,67349.0,202663.0,32597.0,114794.0,188114.0,25840.0,482200.0,95230.0,451335.0,275122.0
25977,1992094,,8881.0,173534.0,39646.0,282287.0,340790.0,393337.0,8893.0,614454.0,...,121080.0,197757.0,260964.0,231614.0,113235.0,41116.0,462608.0,42262.0,92252.0,194532.0


In [62]:
def adding_overall_rating(rating, column_name):
    player_att = player_attributes.groupby(["player_api_id"]).mean().round(0).reset_index()
    dict_zip = dict(zip(player_att["player_api_id"], player_att[rating]))
    colm = ['home_player_' + str(i) for i in range(1,12)] + ["away_player_" + str(i) for i in range(1,12)]
   
    for col in colm:
        col_name = col + column_name
        match[col_name] = [dict_zip.get(i, None) for i in match[col]]
    return match
af2 = pd.DataFrame(adding_overall_rating('overall_rating', '_rating'))


# OPDRACHT 3

In [123]:
s_home = af2[af2['season']== '2015/2016'].filter(like = 'home').filter(like = "rating")
rows_list = []
for i, row in s_home.iterrows():
    row_dict = {'Index': i, 'Home_players': row.mean()}
    rows_list.append(row_dict)

home= pd.DataFrame(rows_list)
display(df)

Unnamed: 0,Index,Away_players
0,1488,68.818182
1,1489,68.000000
2,1490,64.818182
3,1491,66.181818
4,1492,65.272727
...,...,...
3321,25974,60.545455
3322,25975,62.818182
3323,25976,64.909091
3324,25977,62.363636


In [129]:
s_home = af2[af2['season']== '2015/2016'].filter(like = 'home')
s_home

new_df = pd.DataFrame(s_home["home_team_api_id"])

Unnamed: 0,home_team_api_id,home_team_goal,home_player_X1,home_player_X2,home_player_X3,home_player_X4,home_player_X5,home_player_X6,home_player_X7,home_player_X8,...,home_player_2_rating,home_player_3_rating,home_player_4_rating,home_player_5_rating,home_player_6_rating,home_player_7_rating,home_player_8_rating,home_player_9_rating,home_player_10_rating,home_player_11_rating
1488,9997,2,1.0,2.0,4.0,6.0,8.0,3.0,5.0,7.0,...,65.0,70.0,56.0,64.0,60.0,61.0,67.0,67.0,65.0,69.0
1489,8571,2,1.0,2.0,4.0,6.0,8.0,2.0,4.0,6.0,...,62.0,67.0,63.0,67.0,65.0,68.0,67.0,67.0,68.0,60.0
1490,9987,3,1.0,2.0,4.0,6.0,8.0,4.0,6.0,3.0,...,66.0,73.0,71.0,68.0,62.0,69.0,72.0,68.0,66.0,73.0
1491,8573,3,1.0,2.0,4.0,6.0,8.0,3.0,5.0,7.0,...,65.0,67.0,74.0,65.0,70.0,63.0,65.0,69.0,70.0,68.0
1492,10000,3,1.0,2.0,4.0,6.0,8.0,4.0,6.0,3.0,...,62.0,63.0,68.0,66.0,64.0,68.0,67.0,68.0,61.0,69.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25974,10190,1,1.0,2.0,4.0,6.0,8.0,4.0,6.0,3.0,...,53.0,58.0,55.0,64.0,60.0,62.0,60.0,63.0,66.0,57.0
25975,9824,1,1.0,3.0,5.0,7.0,2.0,4.0,6.0,8.0,...,58.0,62.0,57.0,58.0,60.0,63.0,59.0,61.0,54.0,57.0
25976,9956,2,1.0,2.0,4.0,6.0,8.0,4.0,6.0,3.0,...,57.0,63.0,61.0,62.0,77.0,67.0,65.0,58.0,71.0,67.0
25977,7896,0,1.0,2.0,4.0,6.0,8.0,3.0,7.0,5.0,...,67.0,61.0,53.0,62.0,60.0,59.0,59.0,60.0,55.0,59.0


In [109]:
s_away = af2[af2['season']== '2015/2016'].filter(like = 'away').filter(like = "rating")
rows_list1 = []
for i, row in s_away.iterrows():
    row_dict = {'Index': i, 'Away_players': row.mean()}
    rows_list1.append(row_dict)

away = pd.DataFrame(rows_list1)
display(df_a)

Unnamed: 0,Index,Away_players
0,1488,68.818182
1,1489,68.000000
2,1490,64.818182
3,1491,66.181818
4,1492,65.272727
...,...,...
3321,25974,60.545455
3322,25975,62.818182
3323,25976,64.909091
3324,25977,62.363636


In [111]:
s_15_16 = home.merge(away, on = "Index")
s_15_16

Unnamed: 0,Index,Home_players,Away_players
0,1488,64.727273,68.818182
1,1489,65.545455,68.000000
2,1490,68.727273,64.818182
3,1491,67.454545,66.181818
4,1492,65.727273,65.272727
...,...,...,...
3321,25974,59.909091,60.545455
3322,25975,59.272727,62.818182
3323,25976,64.818182,64.909091
3324,25977,59.500000,62.363636


In [131]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.barplot(data = s_15_16, x = 'Home_players', y = 'Away_players', hue=None, order=None, hue_order=None, estimator='mean', errorbar=('ci', 95), n_boot=1000, units=None, seed=None, orient=None, color=None, palette=None, saturation=0.75, width=0.8, errcolor='.26', errwidth=None, capsize=None, dodge=True, ci='deprecated', ax=None)

TypeError: 'str' object is not callable