# import Libraries

In [405]:
import numpy as np
import pandas as pd

from sqlalchemy import create_engine
from sqlalchemy import URL
from sqlalchemy import text
from urllib.parse import quote_plus

# create sql URL

In [406]:
url_object = URL.create(
    "mysql+mysqlconnector",
    username="root",
    password="",
    host="localhost"
    )

# create sengine

In [407]:
engin = create_engine(url_object)

# Fetch the required DATA

In [408]:
query = ('''SELECT `club_full_name`, `club_id`,sum(PPG) as PPG, sum(goals) as goals,sum(own_goals) as own_goals,sum(yellow_cards) as yellow_cards,sum(second_yellow_cards) as second_yellow_cards,sum(red_cards) as red_cards,sum(clean_sheets) as clean_sheets, sum(goals_conceded) as goals_conceded, sum(assists) as assists,sum(minutes_per_goal) as minutes_per_goal FROM `player_statistics` 
WHERE season =2021 and appearances !=0 and competition in ('Premier League', 'Bundesliga', 'Serie A', 'Ligue 1', 'LaLiga') 
GROUP BY club_full_name;''') 

In [409]:
with engin.connect() as conn:
    conn.execute(text("USE quera_project1"))
    df = pd.read_sql(text(query), conn)
    df = df.fillna(0)

In [410]:
df.head(200)

Unnamed: 0,club_full_name,club_id,PPG,goals,own_goals,yellow_cards,second_yellow_cards,red_cards,clean_sheets,goals_conceded,assists,minutes_per_goal
0,Manchester City,281,64.740001,96.0,0.0,42.0,0.0,1.0,21.0,26.0,64.0,4367.0
1,Liverpool FC,31,66.250001,94.0,1.0,50.0,0.0,1.0,21.0,26.0,79.0,4877.0
2,Chelsea FC,631,53.810000,75.0,1.0,63.0,0.0,1.0,16.0,33.0,60.0,7154.0
3,West Ham United,379,33.090000,58.0,2.0,48.0,2.0,1.0,8.0,51.0,49.0,5009.0
4,Manchester United,985,41.640000,56.0,3.0,75.0,1.0,1.0,8.0,57.0,47.0,3607.0
...,...,...,...,...,...,...,...,...,...,...,...,...
62,SC Freiburg,60,38.210000,55.0,1.0,34.0,0.0,0.0,10.0,46.0,46.0,3721.0
63,1.FSV Mainz 05,39,42.290000,49.0,1.0,58.0,2.0,2.0,10.0,45.0,40.0,6175.0
64,1.FC Köln,3,38.760000,52.0,2.0,66.0,1.0,0.0,4.0,49.0,45.0,4904.0
65,FC Augsburg,167,27.600000,39.0,2.0,74.0,0.0,0.0,9.0,56.0,32.0,2901.0


# Normalize columns


In [411]:


import pandas as pd
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

numeric_columns = df.select_dtypes(include=np.number).columns.tolist()
df_normalized = pd.DataFrame(scaler.fit_transform(df[numeric_columns]), columns=numeric_columns)

df_final = pd.merge(df['club_full_name'], df_normalized, left_index=True, right_index=True)

df_final.head()

Unnamed: 0,club_full_name,club_id,PPG,goals,own_goals,yellow_cards,second_yellow_cards,red_cards,clean_sheets,goals_conceded,assists,minutes_per_goal
0,Manchester City,0.011669,0.652637,1.0,0.0,0.123077,0.0,0.166667,1.0,0.0,0.724138,0.468028
1,Liverpool FC,0.001175,0.676988,0.96875,0.166667,0.246154,0.0,0.166667,1.0,0.0,0.982759,0.565375
2,Chelsea FC,0.026361,0.476375,0.671875,0.166667,0.446154,0.0,0.166667,0.705882,0.132075,0.655172,1.0
3,West Ham United,0.015783,0.142235,0.40625,0.333333,0.215385,0.333333,0.166667,0.235294,0.471698,0.465517,0.590571
4,Manchester United,0.041221,0.280116,0.375,0.5,0.630769,0.166667,0.166667,0.235294,0.584906,0.431034,0.322962


In [412]:
pos_attack_metrics = [ "goals", "assists", "PPG"]
pos_attack_weight = [ 70, 30,10 ]

neg_attack_metrics = ["minutes_per_goal"]
neg_attack_weight = [20]

pos_defence_metrics = [ "clean_sheets" ]
pos_defence_weight = [ 50 ]
neg_defence_metrics = ["own_goals", "yellow_cards", "second_yellow_cards", "red_cards", "goals_conceded"]
neg_defence_weight = [10, 10, 10, 30, 40]


def attack_metric(row):
    row['attack_metric'] = sum([pos_attack_weight[i] * row[pos_attack_metrics[i]] for i in range(len(pos_attack_metrics))]) - sum([neg_attack_weight[i] * row[neg_attack_metrics[i]] for i in range(len(neg_attack_metrics))])

    total_weight = sum(pos_attack_weight) + sum(neg_attack_weight)
    row['attack_metric'] = row['attack_metric']/total_weight
    return row['attack_metric']

def defence_metric(row):
    row['defence_metric'] = sum([pos_defence_weight[i] * row[pos_defence_metrics[i]] for i in range(len(pos_defence_metrics))]) - sum([neg_defence_weight[i] * row[neg_defence_metrics[i]] for i in range(len(neg_defence_metrics))])
    total_weight = sum(pos_defence_weight) + sum(neg_defence_weight)
    row['defence_metric'] = row['defence_metric']/total_weight
    return row['defence_metric']



In [413]:
attack_metric(df_final)
defence_metric(df_final)
df_final.head(5)


Unnamed: 0,club_full_name,club_id,PPG,goals,own_goals,yellow_cards,second_yellow_cards,red_cards,clean_sheets,goals_conceded,assists,minutes_per_goal,attack_metric,defence_metric
0,Manchester City,0.011669,0.652637,1.0,0.0,0.123077,0.0,0.166667,1.0,0.0,0.724138,0.468028,0.244989,0.336058
1,Liverpool FC,0.001175,0.676988,0.96875,0.166667,0.246154,0.0,0.166667,1.0,0.0,0.982759,0.565375,0.226322,0.317949
2,Chelsea FC,0.026361,0.476375,0.671875,0.166667,0.446154,0.0,0.166667,0.705882,0.132075,0.655172,1.0,-0.135952,0.162136
3,West Ham United,0.015783,0.142235,0.40625,0.333333,0.215385,0.333333,0.166667,0.235294,0.471698,0.465517,0.590571,-0.072532,-0.116067
4,Manchester United,0.041221,0.280116,0.375,0.5,0.630769,0.166667,0.166667,0.235294,0.584906,0.431034,0.322962,0.046124,-0.170331


In [414]:
df_final[['club_full_name', 'attack_metric', 'defence_metric']].sort_values(by=['attack_metric', 'defence_metric'],
                             ascending=[False, True])
# df_final['defence_attack_diff'] =  df_final['attack_metric'] -  df_final['defence_metric']

# df_final[['club_full_name', 'defence_attack_diff']].sort_values(by=['defence_attack_diff'],
#                              ascending=[False])


Unnamed: 0,club_full_name,attack_metric,defence_metric
0,Manchester City,0.244989,0.336058
56,Bayer 04 Leverkusen,0.234239,-0.091751
1,Liverpool FC,0.226322,0.317949
53,RB Leipzig,0.214210,0.077685
25,Olympique Marseille,0.209740,0.078671
...,...,...,...
15,Brentford FC,-0.287615,-0.130094
37,ESTAC Troyes,-0.289410,-0.196552
55,Eintracht Frankfurt,-0.302841,-0.131304
12,Wolverhampton Wanderers,-0.319183,0.019896


In [415]:
df_final[['club_full_name', 'defence_attack_diff']].to_csv('player_request.csv', index=False)

KeyError: "['defence_attack_diff'] not in index"

In [None]:
!jupytext --to py -o PlayerRequest.py PlayerRequest.ipynb

[jupytext] Reading PlayerRequest.ipynb in format ipynb
[jupytext] Updating the timestamp of PlayerRequest.py
