# Exploratory Data Analysis for the three seasons of 2017-18, 2018-19 and 2019-20

In [1]:
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd 
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import plotly.express as pex
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

  import pandas.util.testing as tm


In [2]:
data = pd.read_csv("EDA_data.csv")

## Distribution of Players by Country, Age, and Position

In [35]:
df = data
fig = px.pie(df,
             values='Appearances',
             names='nationality',
             title='Countries represented in the EPL by number of appearances',
             )
fig.update_traces(textposition='inside', textinfo='percent+label')
iplot(fig)

In [47]:
df = data[data['Season'] == '2019-20']
fig = pex.sunburst(df, 
                   path=['Position', 'nationality'], 
                   values='Appearances', 
                   title='Countries represented in the EPL by number of appearances and by players position for the 2019-20 season',
                   )
iplot(fig)

In [48]:
df = data
fig = pex.sunburst(df, 
                   path=['Position', 'nationality'], 
                   values='Appearances', 
                   title='Countries represented in the EPL by number of appearances and by players position for all three seasons',
                   )
iplot(fig)

In [43]:
df = data
fig = px.pie(df,
             values='Appearances',
             names='age',
             title='Countries represented in the EPL by players age',
             )
fig.update_traces(textposition='inside', textinfo='percent+label')
iplot(fig)

In [34]:
df = data
fig = px.bar(df, y="age", x="Appearances", color = 'Position', hover_data=["mod name"],
                title='Number of appearances distributed by Players Age', orientation='h')
iplot(fig)

In [61]:
df = data
fig = px.bar(df, x="Position", y="Appearances", hover_data=["mod name"], color = 'nationality',
             height=600,
             title='Players appearance by position (Looks like more the traditional 4-4-2 ? Note quite, but close)')
iplot(fig)

## Players Age distribution by position

In [10]:
df = data
fig = px.violin(df, y="age", x="Position", box=True,
                title='Players Age distribution by position')
iplot(fig)

## Market Value distribution by Players Age for the 2019-20 season

In [38]:
df = data[data['Season'] == '2019-20']
fig = px.bar(df, y="age", x="value_eur",  color = 'Position', hover_data=["mod name"],
                title='Market Value distribution by Players Age for the 2019-20 season', orientation='h')
iplot(fig)

### Overall Rating distribution by Players Age for the 2019-20 season

In [39]:
df = data[data['Season'] == '2019-20']
fig = px.bar(df, y="age", x="overall", color = 'Position', hover_data=["mod name"],
                title='Overall Rating distribution by Players Age for the 2019-20 season', orientation='h')
iplot(fig)

## Top stats for goalkeepers in the season 2019-20

In [60]:
from plotly.subplots import make_subplots

data_1920 = data[data['Season'] == '2019-20']
head = 5
df1=data_1920[data_1920['Position'] == "Goalkeeper"].sort_values(by='Clean sheets', ascending=False).head(head)
df2=data_1920[data_1920['Position'] == "Goalkeeper"].sort_values(by='Saves', ascending=False).head(head)
df3=data_1920[data_1920['Position'] == "Goalkeeper"].sort_values(by='Arial Saves', ascending=False).head(head)


fig = make_subplots(
    rows=3, cols =1,
    subplot_titles=('Clean sheets', 'Saves','Arial Saves'))

fig.add_trace(go.Bar(
                y=df1["mod name"], 
                x=df1['Clean sheets'],
                hovertext=df1['nationality'],
                orientation='h'),
                row=1, col=1)

fig.add_trace(go.Bar(
                y=df2["mod name"], 
                x=df2['Saves'],
                hovertext=df2['nationality'],
                orientation='h'),
                row=2, col=1)

fig.add_trace(go.Bar(
                y=df3["mod name"], 
                x=df3['Arial Saves'],
                hovertext=df3['nationality'],
                orientation='h'),
                row=3, col=1)




fig.update_layout(title_text='Top stats for goalkeepers in the season 2019-20', title_x=0.5)

fig.update_layout(showlegend=False)
fig.update_layout(autosize=False, width=800, height=1200)
fig.show()