# PREDIKSI HASIL PERTANDINGAN ENGLISH PREMIER LEAGUE

# 1. Pengambilan Data

- Mengambil data dari website http://www.football-data.co.uk, dimana menyediakan berbagai data pertandingan sepak bola. 
- Data di website terpisah berdasarkan musimnya (per musim) dan tersedia dalam format CSV. 
- Diambil data English Premier League mulai musim 2010/2011 sampai 2018/2019 (9 musim).

In [1]:
# Impor/Setup library

import pandas as pd   # membaca data
import requests   # meminta data dari url
from bs4 import BeautifulSoup   # menarik data
import time  # akses waktu pengambilan data

In [2]:
# Identifikasi musim
epl_seasons = ['1011', '1112', '1213', '1314', '1415', '1516', '1617', '1718', '1819']

# List/Daftar kosong untuk menyimpan dataframe
epl = []

# Looping tiap musimnya, mengambil kolom yang akan digunakan terkait pertandingan 
# (nama tim home, nama tim away, hasil pertandingan, gol, tendangan, tendangan ke gawang, sepak pojok) 
for season in epl_seasons:
    get = pd.read_csv('http://www.football-data.co.uk/mmz4281/' + season + '/E0.csv')[['HomeTeam', 'AwayTeam', 'FTR', 
                                                                                       'FTHG', 'FTAG', 'HS', 'AS', 'HST', 
                                                                                       'AST', 'HC', 'AC']]
    get['Season'] = int(season[:4])
    epl.append(get)

In [3]:
# Tampilkan data pertandingan
epl[0]

Unnamed: 0,HomeTeam,AwayTeam,FTR,FTHG,FTAG,HS,AS,HST,AST,HC,AC,Season
0,Aston Villa,West Ham,H,3,0,23,12,11,2,16,7,1011
1,Blackburn,Everton,H,1,0,7,17,2,12,1,3,1011
2,Bolton,Fulham,D,0,0,13,12,9,7,4,8,1011
3,Chelsea,West Brom,H,6,0,18,10,13,4,3,1,1011
4,Sunderland,Birmingham,D,2,2,6,13,2,7,3,6,1011
...,...,...,...,...,...,...,...,...,...,...,...,...
375,Newcastle,West Brom,D,3,3,15,13,10,7,7,6,1011
376,Stoke,Wigan,A,0,1,11,11,5,9,5,3,1011
377,Tottenham,Birmingham,H,2,1,22,7,16,3,7,5,1011
378,West Ham,Sunderland,A,0,3,17,17,12,12,4,6,1011


In [4]:
# Membuat file CSV pertandingan (menyimpannya)

epl[0].to_csv('season-1011.csv')
epl[1].to_csv('season-1112.csv')
epl[2].to_csv('season-1213.csv')
epl[3].to_csv('season-1314.csv')

epl[4].drop(epl[4].tail(1).index,inplace=True)  # hapus baris terakhir season 1415, missing row (baris kosong)
epl[4].to_csv('season-1415.csv')  

epl[5].to_csv('season-1516.csv')
epl[6].to_csv('season-1617.csv')
epl[7].to_csv('season-1718.csv')
epl[8].to_csv('season-1819.csv')

- Mengambil data dari website https://www.pesmaster.com, dimana menyediakan data rating berdasarkan game PES. 
- Data di website tersedia dalam tampilan tabel. 
- Diambil data rating tim English Premier League dari PES 11 (musim 2010/2011) sampai PES 19 (musim 2018/2019).

In [5]:
# Membuat dictionary/kamus untuk penamaan tim
# Merubah nama tim dari data PES, disamakan dengan data Football-Data
# Nama di kiri adalah penamaan PES, sedangkan nama di kanan adalah penamaan Football-Data

change_team_name = {'Burnley FC': 'Burnley',
                    'Southampton FC': 'Southampton',
                    'West Bromwich Albion': 'West Brom',
                    'Manchester United': 'Man United',
                    'Manchester City': 'Man City', 
                    'Newcastle United': 'Newcastle',
                    'West Ham United': 'West Ham', 
                    'Tottenham Hotspur': 'Tottenham',
                    'Bolton Wanderers': 'Bolton',
                    'Blackburn Rovers': 'Blackburn',
                    'Wigan Athletic': 'Wigan', 
                    'Brighton & Hove Albion': 'Brighton',
                    'Huddersfield Town': 'Huddersfield',
                    'Cardiff City': 'Cardiff',
                    'Hull City': 'Hull',
                    'Norwich City': 'Norwich',
                    'Stoke City': 'Stoke',
                    'Birmingham City': 'Birmingham',
                    'Swansea City': 'Swansea',
                    'Leicester City': 'Leicester',
                    'Wolverhampton Wanderers': 'Wolves',
                    'Wigan Athletic': 'Wigan'}

In [6]:
# Daftar kosong untuk menyimpan dataframe
teams_ratings = []

# Looping tiap tahunnya
for year in range(2011, 2020):  # rentang tahun PES 11 - PES 19
    url = 'https://www.pesmaster.com/english-league/pes-' + str(year) + '/league/9/'
    res = requests.get(url, headers={'User-agent': 'slsl'})
    
    if res.status_code != 200:
        print('Status not 200', res.status_code)
        break

    soup = BeautifulSoup(res.content, 'lxml')
    table = soup.find("table", {"id" : "search-result-table"})
    tr = table.find_all('tr')
    
    teams = []

    for i in range(1,(len(tr))):
            result = {}

            td = tr[i].find_all('td')
            
            if td:
                result['Team'] = td[0].text
                result['HOvrPES'] = td[1].text  # overall/nilai keseluruhan tim
                result['HDefPES'] = td[2].text  # defence/pertahanan
                result['HMidPES'] = td[3].text  # mid/gelandang
                result['HFwdPES'] = td[4].text  # forward/penyerangan
                result['HPhyPES'] = td[5].text  # physique/kekuatan fisik
                result['HSpdPES'] = (td[6].text)  # speed/kecepatan
                result['AOvrPES'] = td[1].text
                result['ADefPES'] = td[2].text
                result['AMidPES'] = td[3].text
                result['AFwdPES'] = td[4].text
                result['APhyPES'] = td[5].text
                result['ASpdPES'] = (td[6].text)
                result['PES'] = year - 2000

            if len(result) == 14:
                teams.append(result)
                
    time.sleep(3)
    
    teams_data = pd.DataFrame(teams, columns=(['PES', 'Team', 'HOvrPES', 'HDefPES', 'HMidPES', 'HFwdPES', 'HPhyPES', 'HSpdPES', 
                                               'AOvrPES', 'ADefPES', 'AMidPES', 'AFwdPES', 'APhyPES', 'ASpdPES']))
    teams_data = teams_data.set_index('Team')
    teams_data.rename(index=change_team_name, inplace=True)
    teams_data.sort_index(inplace=True)

    # Masukkan semua musim ke daftar
    teams_ratings.append(teams_data)

In [7]:
# Tampilkan data rating PES
teams_ratings[0]

Unnamed: 0_level_0,PES,HOvrPES,HDefPES,HMidPES,HFwdPES,HPhyPES,HSpdPES,AOvrPES,ADefPES,AMidPES,AFwdPES,APhyPES,ASpdPES
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Arsenal,11,77,76,74,82,78,80,77,76,74,82,78,80
Aston Villa,11,74,73,72,77,78,79,74,73,72,77,78,79
Birmingham,11,73,77,71,73,78,77,73,77,71,73,78,77
Blackburn,11,71,75,68,70,80,76,71,75,68,70,80,76
Blackpool,11,69,71,68,69,77,76,69,71,68,69,77,76
Bolton,11,72,75,74,68,79,77,72,75,74,68,79,77
Chelsea,11,77,82,76,76,79,78,77,82,76,76,79,78
Everton,11,72,76,71,69,79,77,72,76,71,69,79,77
Fulham,11,74,76,73,74,79,76,74,76,73,74,79,76
Liverpool,11,75,74,76,73,80,76,75,74,76,73,80,76


In [8]:
# Membuat file CSV rating PES (menyimpannya)

teams_ratings[0].to_csv('rating-1011PES.csv')

teams_ratings[1].iloc[11,teams_ratings[1].columns.get_loc('HFwdPES')]=65  #data rating 1112 missing value (nilai kosong)
teams_ratings[1].iloc[11,teams_ratings[1].columns.get_loc('AFwdPES')]=65  #tim Norwich untuk Fwd diperkirakan nilainya 
teams_ratings[1].iloc[13,teams_ratings[1].columns.get_loc('HFwdPES')]=71  #tim Stoke untuk Fwd diperkirakan nilainya 
teams_ratings[1].iloc[13,teams_ratings[1].columns.get_loc('AFwdPES')]=71  #dg menghitung nilai rata2 penyerangnya lgsng dri web                                                         
teams_ratings[1].to_csv('rating-1112PES.csv')  

teams_ratings[2].iloc[11,teams_ratings[2].columns.get_loc('HFwdPES')]=66  #data rating 1213 missing value (nilai kosong)
teams_ratings[2].iloc[11,teams_ratings[2].columns.get_loc('AFwdPES')]=66  #tim Reading untuk Fwd diperkirakan nilainya
teams_ratings[2].iloc[13,teams_ratings[2].columns.get_loc('HFwdPES')]=72  #tim Stoke untuk Fwd diperkirakan nilainya
teams_ratings[2].iloc[13,teams_ratings[2].columns.get_loc('AFwdPES')]=72  #dg menghitung nilai rata2 penyerangnya lgsng dri web 
teams_ratings[2].to_csv('rating-1213PES.csv')

teams_ratings[3].to_csv('rating-1314PES.csv')
teams_ratings[4].to_csv('rating-1415PES.csv')
teams_ratings[5].to_csv('rating-1516PES.csv')
teams_ratings[6].to_csv('rating-1617PES.csv')
teams_ratings[7].to_csv('rating-1718PES.csv')
teams_ratings[8].to_csv('rating-1819PES.csv')