In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
import warnings
import json

import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.express as px

import squarify
from sklearn import manifold

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
warnings.filterwarnings('ignore')

In [None]:
ks=pd.read_csv("/kaggle/input/ucl-202122-uefa-champions-league/key_stats.csv")
gl=pd.read_csv("/kaggle/input/ucl-202122-uefa-champions-league/goals.csv")
dfn=pd.read_csv("/kaggle/input/ucl-202122-uefa-champions-league/defending.csv")
atkg=pd.read_csv("/kaggle/input/ucl-202122-uefa-champions-league/attacking.csv")
gk=pd.read_csv("/kaggle/input/ucl-202122-uefa-champions-league/goalkeeping.csv")
dsp=pd.read_csv("/kaggle/input/ucl-202122-uefa-champions-league/disciplinary.csv")
atm=pd.read_csv("/kaggle/input/ucl-202122-uefa-champions-league/attempts.csv")
dis=pd.read_csv("/kaggle/input/ucl-202122-uefa-champions-league/distributon.csv")

# Key Stats 

**4.1. Descriptive analysis**

In [None]:
ks.head()

In [None]:
ks.describe().T

In [None]:
ks.describe(include=['object']).T

**4.2. Number of goals scored per club**

In [None]:
tks = ks.groupby('club', as_index = False)['goals'].sum().sort_values(by='goals', ascending = False)
sns.set(rc={'figure.figsize': (20,5)})
plt.xticks(fontsize=12, rotation = 'vertical')
p = sns.barplot(x = 'club', y = 'goals', data = tks, palette = 'plasma', capsize = .2)
p.axes.set_title('\nTotal Goals Scored By Club', fontsize = 30)

In [None]:
fig = px.treemap(tks, path =['club'], values = 'goals', width = 1200, height = 400, title = '\nTotal Goals Scored By Club')
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
fig.show()

**4.3. Average playing time per player per club**

In [None]:
tks = ks.groupby('club', as_index=False)['minutes_played'].mean().sort_values(by = 'minutes_played', ascending = False)
sns.set(rc = {'figure.figsize': (20, 5)})
plt.xticks(fontsize = 12, rotation = 'vertical')
p = sns.barplot(x='club', y='minutes_played', data = tks, palette = 'summer', capsize = .2)
p.axes.set_title('\nAverage playing time per player per club\n', fontsize = 30)

**4.4 Top 10 goal scorers**

In [None]:
ks = ks.sort_values(by = 'goals', ascending = False)
tks = ks[:10]
sns.set(rc = {'figure.figsize': (20, 5)})
plt.xticks(fontsize = 12, rotation = 'vertical')
p = sns.barplot(x='player_name', y='goals', data = tks, palette = 'cividis', capsize = .2)
p.axes.set_title('\nTop 10 Goal Scorers\n', fontsize = 30)

**4.5. Unsupervised Testing with TSNE(Goals)**

In [None]:
# replacing nulls with 0
ks = ks.replace(to_replace = '-', value = 0)

# TSNE class, 2 components for easy visualization
tsne = manifold.TSNE(n_components=2, random_state = 42)

# transforming data
transformed_data = tsne.fit_transform(ks[['minutes_played', 'match_played', 'distance_covered']])
tsne_df = pd.DataFrame(np.column_stack((transformed_data, ks['goals'])), columns = ['x', 'y', 'goals'])
tsne_df.loc[:, 'goals'] = tsne_df.goals.astype(int)

# plotting TSNE transformed data
sns.set(rc = {'figure.figsize': (15, 10)})
grid = sns.FacetGrid(tsne_df, hue = 'goals', size = 8, palette = 'binary')
grid.map(plt.scatter, 'x', 'y').add_legend()
grid.refline(x=tsne_df['goals'].quantile(.9), color = 'blue')
grid.refline(y = tsne_df['goals'].quantile(.9), color = 'red')

# 5. Goals Analysis

**5.1. top 15 teams in terms of goals**

In [None]:
g1 = ks.groupby('club')['goals'].agg('sum').reset_index(name = 'count').sort_values(by = 'count', ascending = False)
g1 = g1[:15]
sns.set(rc = {'figure.figsize': (20, 5)})
plt.xticks(fontsize = 12, rotation = 'vertical')
p = sns.barplot(x='club', y='count', data = g1, palette = 'hot', capsize = .2)
p.axes.set_title('\nTop 15 Teams in terms of goals\n', fontsize = 30)

**5.2. Goal types analysis(How Scored)**

In [None]:
gl_sum = pd.DataFrame(gl.sum(numeric_only = True),).T[['right_foot', 'left_foot', 'headers', 'others']]
gl_sum = gl_sum.T
fig = px.pie(gl_sum, values = 0, names = gl_sum.index, title = 'Goal Types analysis(How Scored)', color_discrete_sequence = px.colors.sequential.RdBu)
fig.show()

**5.3. Goal types analysis(from where scored)**

In [None]:
gl_sum = pd.DataFrame(gl.sum(numeric_only = True),).T[['inside_area', 'outside_areas', 'penalties']]
gl_sum = gl_sum.T
fig = px.pie(gl_sum, values = 0, names = gl_sum.index, title = 'Goal types analysis (From where scored)')
fig.show()

**5.4. Goal types analysis (player position)**

In [None]:
sns.set(rc = {'figure.figsize': (15,5)})
plt.xticks(fontsize = 15)
p = sns.countplot(gl['position'], hue_order = gl.groupby('position'), order = gl.position.value_counts().sort_values(ascending = False).index, palette = 'Set2')
p.axes.set_title('Goal types analysis (player position)', fontsize = 30)

# 6. Attacking Analysis

**6.1. Top 15 Dribblers**

In [None]:
atkg = atkg.sort_values(by = 'dribbles', ascending = False)
t = atkg[:15]
sns.set(rc={'figure.figsize': (20,5)})
plt.xticks(fontsize = 15, rotation = 'vertical')
p = sns.barplot(x = 'player_name', y = 'dribbles', data = t, palette = 'cividis', capsize = .2)
p.axes.set_title('\nTop 15 Dribblers\n', fontsize = 20)

**6.2. Team Dribbles**

In [None]:
d1 = atkg.groupby('club')['dribbles'].agg('sum').reset_index(name = 'count').sort_values(by = 'count', ascending = False)
d1 = d1[:15]
sns.set(rc={'figure.figsize': (20,5)})
plt.xticks(fontsize = 15, rotation = 'vertical')
p = sns.barplot(x = 'club', y = 'count', data = d1, palette = 'summer', capsize = .2)
p.axes.set_title('\nTop 15 Dribbling Clubs\n', fontsize = 20)

**6.3. Top 15 corner Takers**

In [None]:
atkg = atkg.sort_values(by = 'corner_taken', ascending = False)
t = atkg[:15]
sns.set(rc={'figure.figsize': (20,5)})
plt.xticks(fontsize = 15, rotation = 'vertical')
p = sns.barplot(x = 'player_name', y = 'corner_taken', data = t, palette = 'cividis', capsize = .2)
p.axes.set_title('\nTop 15 Corner Takers\n', fontsize = 20)

**6.4. Top Corners Taken by club**

In [None]:
atkg = atkg.groupby('club')['corner_taken'].agg('sum').reset_index(name = 'count').sort_values(by = 'count', ascending = False)
t = atkg[:15]
sns.set(rc={'figure.figsize': (20,5)})
plt.xticks(fontsize = 15, rotation = 'vertical')
p = sns.barplot(x = 'club', y = 'count', data = t, palette = 'summer', capsize = .2)
p.axes.set_title('\nTop 15 Corner Taking Clubs\n', fontsize = 20)

# 7. Defense Analysis

**7.1. Ball Recovery analysis by playing position**

In [None]:
g1=dfn.groupby('position')['balls_recoverd'].agg('sum').reset_index(name='count').sort_values('count',ascending=False)
sns.set(rc={'figure.figsize':(10,10)})
fig = px.pie(g1, values="count", names=g1.position, title='Ball recovery analysis')

**7.2. Tackles won/loss ratio**

In [None]:
gl_sum = pd.DataFrame(dfn.sum(numeric_only = True),).T[['t_won', 't_lost']]
gl_sum = gl_sum.T
fig = px.pie(gl_sum, values = 0, names = gl_sum.index, title = 'Tackle Won vs Lost', color_discrete_sequence = px.colors.sequential.YlOrRd)
fig.show()

**7.3. Clearance Per club**

In [None]:
g1 = dfn.groupby('club')['clearance_attempted'].agg('sum').reset_index(name = 'count').sort_values(by = 'count', ascending = False)
sns.set(rc= {'figure.figsize': (20,5)})
plt.xticks(fontsize = 15, rotation = 'vertical')
p = sns.barplot(x = 'club', y = 'count', data = g1, palette = 'magma', capsize = .2)
p.axes.set_title('\nTeams by clearance attempted\n')

**7.4. Club with successful tackles**

In [None]:
dfn = dfn.groupby('club')['t_won'].agg('sum').reset_index(name = 'count').sort_values(by = 'count', ascending = False)
sns.set(rc= {'figure.figsize': (20,5)})
plt.xticks(fontsize = 15, rotation = 'vertical')
p = sns.barplot(x = 'club', y = 'count', data = dfn, palette = 'viridis', capsize = .2)
p.axes.set_title('\nClubs by successful tackles\n')

**7.5. Top 15 defender by ranking**

**Ranking system:**
* (tackle by defender/highest tackles)* 0.3 +
* (balls_recoverd by defender/highest balls_recoverd)* 0.3 +
* (tackle won by defender/highest tackles won )* 0.2 +
* (tackle won by defender/ tackles attempted )* 0.1 +
* (clearence attempted by defender/highest clearence attempted )* 0.1

In [None]:
#dfn['ranking'] = (dfn['balls_recoverd']/dfn['balls_recoverd'].max())*.3 + (dfn['tackles']/dfn['tackles'].max())*.3 + (dfn['t_won']/dfn['t_won'].max())*.2 + (dfn['t_won']/dfn['tackles'])*.1 + (dfn['clearance_attempted']/dfn['clearance_attempted'].max())*.1
#dfnx = dfn.sort_values(by = 'ranking', ascending = False)
#dfnx = dfnx[:15]
#plt.xticks(fontsize = 15, rotation = 'vertical')
#p = sns.barplot(x = 'player_name', y = 'ranking', data = dfnx, palette = 'inferno', capsize = .2)
#p.axes.set_title('\n Top 15 Defenders by ranking\n', fontsize = 30)

# 8. Attacking Attempts analysis

**8.1. Attacking attempts analysis- success rate**

In [None]:
gl_sum = pd.DataFrame(atm.sum(numeric_only = True),).T[['on_target', 'off_target', 'blocked']]
gl_sum = gl_sum.T
fig = px.pie(gl_sum, values = 0, names = gl_sum.index, title = 'Attacking attempts analysis - success rate: on target vs off_target vs blocked', color_discrete_sequence = px.colors.sequential.YlOrRd)
fig.show()

**8.2. Club ranking by total attacking attempts**

In [None]:
g1 = atm.groupby('club')['total_attempts'].agg('sum').reset_index(name = 'count').sort_values(by = 'count', ascending = False)
sns.set(rc={'figure.figsize':(20,5)})
plt.xticks(fontsize=15,rotation='vertical')
p=sns.barplot(x="club", y="count", data=g1,palette="viridis",capsize=2)
p.axes.set_title("\n Club ranking by total attacking attempts\n",fontsize=30);

**8.3. Player position ranking by total attacking attempts**

In [None]:
g1 = atm.groupby('position')['total_attempts'].agg('sum').reset_index(name = 'count').sort_values(by = 'count', ascending = False)
sns.set(rc={'figure.figsize':(20,5)})
plt.xticks(fontsize=15,rotation='vertical')
p=sns.barplot(x="position", y="count", data=g1,palette="viridis",capsize=2)
p.axes.set_title("\n player ranking by total attacking attempts\n",fontsize=30)

**8.4. Top 15 attacker by ranking on attacking attempts**

**Ranking system:**
* (total_attempts/highest total_attempts)* 0.5 +
* (on_target/highest on_target)* 0.3 +
* (on_target/total_attempts )* 0.4 -
* (off_target/total_attempts)* 0.2

In [None]:
atm['ranking'] = (atm['total_attempts']/ atm['total_attempts'].max())*0.5 + (atm['on_target']/atm['on_target'].max())*0.3 + (atm['on_target']/atm['total_attempts'])*0.4 - (atm['off_target']/atm['total_attempts'])*0.2
atmx = atm.sort_values(by = 'ranking', ascending = False)
atmx = atmx[:15]
sns.set(rc={'figure.figsize':(20,5)})
plt.xticks(fontsize=15,rotation='vertical')
p=sns.barplot(x="player_name", y="ranking", data=atmx ,palette="viridis",capsize=2)
p.axes.set_title("\n Top 15 attackers by ranking on attacking attempts\n",fontsize=30)

# 9. Goal Keeper performance analysis

**9.1. Overall gk performance**

In [None]:
gl_sum = pd.DataFrame(gk.sum(numeric_only = True),).T[['saved', 'conceded']]
gl_sum = gl_sum.T
fig = px.pie(gl_sum, values = 0, names = gl_sum.index, title = 'Overall GK Performance', color_discrete_sequence = px.colors.sequential.Reds)
fig.show()

**9.2. Club ranking by gk saves**

In [None]:
gk = gk.groupby('club')['saved'].agg('sum').reset_index(name = 'count').sort_values(by = 'count', ascending = False)
sns.set(rc={'figure.figsize':(20,5)})
plt.xticks(fontsize=15,rotation='vertical')
p=sns.barplot(x='club', y='count', data=gk,palette="winter",capsize=2)
p.axes.set_title("\n Club ranking by GK Saves\n",fontsize=30);