# English Premier League Football Players Analysis
The English Premier League is the top level of the football system. Different players come from different countries and play in different positions.
The main objective is to analyze the data of all the players playing in the league and differentiate the players according to   their countries and playing positions.


## Procedure

Web scrape to get the data from the premier league website.
Compare the players playing in the league according to the countries which they come from by generating a pie chart.
Compare the players playing in the league according to their playing positions like (forward, midfielder, defender, goalkeeper) and display the results using a pie chart. 


## Libraries Required:
Puppeteer: 
Pickle:
BeautifulSoup:

### Here puppeteer is used  because it is not possible for getting all the players data using  Beautiful Soup. So, I have used puppeteer.
### This code is used for autoscrolling till the end of the page and it automatically downloads the html file.


const puppeteer = require('puppeteer');
const fs = require("fs");

(async () => {
    const browser = await puppeteer.launch({
        headless: false
    });
    const page = await browser.newPage();
    await page.goto('https://www.premierleague.com/players?se=274&cl=-1');
    await page.setViewport({
        width: 1200,
        height: 800
    });

    await autoScroll(page);


    const html = await page.content();

    fs.writeFileSync("premireLeague_2016-2017.html", html)
    await page.screenshot({
        path: 'premireLeagueScreenshot.png',
        fullPage: true
    });

    await browser.close();
})();

async function autoScroll(page){
    await page.evaluate(async () => {
        await new Promise((resolve, reject) => {
            var totalHeight = 0;
            var distance = 100;
            var timer = setInterval(() => {
                var scrollHeight = document.body.scrollHeight;
                window.scrollBy(0, distance);
                totalHeight += distance;

                if(totalHeight >= scrollHeight){
                    clearInterval(timer);
                    resolve();
                }
            }, 100);
        });
    });
}

### The html file is read here and the data is extracted here and stored in pickle.

In [None]:
from bs4 import BeautifulSoup
import pickle
soup = BeautifulSoup(open("C:\premireLeague_2019-2020.html", encoding="utf8"), "html.parser")
list = soup.find_all('tr')
playersList = {
        "data": []
    };
count = 0
for l in list[1:]:
    count=count + 1
    print(count)
    print("            ")
    playerName = l.find('a',class_="playerName").text
    print(playerName)
    position = l.find('td',class_="hide-s").text
    print(position)
    playerCountry = l.find('span',class_="playerCountry").text
    print(playerCountry)
    print("            ")
    details = {
                    "Name" : playerName,
                    "Position" : position,
                    "Player Country" : playerCountry,
                }
    playersList['data'].append(details)

with open('players_list_2019-2020.pkl', 'wb') as f:
    pickle.dump(playersList,f,protocol=pickle.HIGHEST_PROTOCOL)

### This code takes the player data for the 2019-20 season and seperates the players according to their nationality. We use collections and matplotlib to show the results in the form of a pie chart

In [None]:
import pickle
import collections
import matplotlib.pyplot as plt

with open('players_list_2019-2020.pkl', 'rb') as f:
    my_data2 = pickle.load(f)
valueData = my_data2.get('data')
labels = []
threshold = 15
values = []
countryList = []
others = 0
for val in valueData:
    countryList.append(val.get('Player Country'))

for k,v in collections.Counter(countryList).items():
    if v >= threshold:
        labels.append(k)
        values.append(v)
    else:
        others = others+v
labels.append('Others')
values.append(others)
print(labels)
print(values)

plt.pie(values, labels=labels, 
autopct='%1.1f%%', startangle=140)

plt.axis('equal')
plt.show()

![Nationalitypiechart](Figure_1.png)

### This code takes the player data for the 2019-20 season and seperates the players according to their playing positions. We use collections and matplotlib to show the results in the form of a pie chart

In [None]:
import pickle
import collections
import matplotlib.pyplot as plt

with open('players_list_2019-2020.pkl', 'rb') as f:
    my_data2 = pickle.load(f)
valueData = my_data2.get('data')
labels = []
values = []
countryList = []
for val in valueData:
    countryList.append(val.get('Position'))
#print(collections.Counter(countryList)[10])
for k,v in collections.Counter(countryList).items():
    labels.append(k)
    values.append(v)

# Plot
plt.pie(values, labels=labels, 
autopct='%1.1f%%', startangle=140)

plt.axis('equal')
plt.show()



![positionpiechart](Figure_2.png)

### This code takes the player data for the last ten seasons and shows how many players playing in the league are of English nationality

In [None]:
import pickle
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

with open('players_list_2010-2011.pkl', 'rb') as f:
    my_data1 = pickle.load(f)
with open('players_list_2011-2012.pkl', 'rb') as f:
    my_data2 = pickle.load(f)
with open('players_list_2012-2013.pkl', 'rb') as f:
    my_data3 = pickle.load(f)
with open('players_list_2013-2014.pkl', 'rb') as f:
    my_data4 = pickle.load(f)
with open('players_list_2014-2015.pkl', 'rb') as f:
    my_data5 = pickle.load(f)
with open('players_list_2015-2016.pkl', 'rb') as f:
    my_data6 = pickle.load(f)
with open('players_list_2016-2017.pkl', 'rb') as f:
    my_data7 = pickle.load(f)
with open('players_list_2017-2018.pkl', 'rb') as f:
    my_data8 = pickle.load(f)
with open('players_list_2018-2019.pkl', 'rb') as f:
    my_data9 = pickle.load(f)
with open('players_list_2019-2020.pkl', 'rb') as f:
    my_data10 = pickle.load(f)

valueData1 = my_data1.get('data')
valueData2 = my_data2.get('data')
valueData3 = my_data3.get('data')
valueData4 = my_data4.get('data')
valueData5 = my_data5.get('data')
valueData6 = my_data6.get('data')
valueData7 = my_data7.get('data')
valueData8 = my_data8.get('data')
valueData9 = my_data9.get('data')
valueData10 = my_data10.get('data')

labels = []
values = []
countryList1 = []
countryList2 = []
countryList3 = []
countryList4 = []
countryList5 = []
countryList6 = []
countryList7 = []
countryList8 = []
countryList9 = []
countryList10 = []
for val in valueData1:
    countryList1.append(val.get('Player Country'))
for val in valueData2:
    countryList2.append(val.get('Player Country'))
for val in valueData3:
    countryList3.append(val.get('Player Country'))
for val in valueData4:
    countryList4.append(val.get('Player Country'))
for val in valueData5:
    countryList5.append(val.get('Player Country'))
for val in valueData6:
    countryList6.append(val.get('Player Country'))
for val in valueData7:
    countryList7.append(val.get('Player Country'))
for val in valueData8:
    countryList8.append(val.get('Player Country'))
for val in valueData9:
    countryList9.append(val.get('Player Country'))
for val in valueData10:
    countryList10.append(val.get('Player Country'))


labels.append('2010-11')
values.append(countryList1.count('England'))
labels.append('2011-12')
values.append(countryList2.count('England'))
labels.append('2012-13')
values.append(countryList3.count('England'))
labels.append('2013-14')
values.append(countryList4.count('England'))
labels.append('2014-15')
values.append(countryList5.count('England'))

labels.append('2015-16')
values.append(countryList6.count('England'))
labels.append('2016-17')
values.append(countryList7.count('England'))
labels.append('2017-18')
values.append(countryList8.count('England'))
labels.append('2018-19')
values.append(countryList9.count('England'))
labels.append('2019-20')
values.append(countryList10.count('England'))

print(labels)
print(values)

freq_series = pd.Series(values)

# Plot the figure.
plt.figure(figsize=(44, 44))
ax = freq_series.plot(kind='barh')
ax.set_title('Number of players from England playing in the English Premier League')
ax.set_xlabel('Number of English Players')
ax.set_ylabel('Season')
ax.set_yticklabels(labels)
ax.set_xlim(0, 400) 

rects = ax.patches


for rect in rects:
    
    x_value = rect.get_width()
    y_value = rect.get_y() + rect.get_height() / 2

    
    space = 5
    
    ha = 'left'

   
    if x_value < 0:
        
        space *= -1
        
        ha = 'right'

    
    label = "{:.1f}".format(x_value)

    
    plt.annotate(
        label,                      
        (x_value, y_value),
        xytext=(space, 0),          
        textcoords="offset points", 
        va='center',
        ha=ha)                      
                                    

plt.savefig("image.png")


![englishplayersbargraph](Figure_23png.png)