In [1]:
import pandas as pd

# Data (Web Scraping)
##### data web (https://football-data.co.uk/englandm.php)

In [6]:
#selecting data
data = pd.read_csv('E0.csv')

#clearing data
columns_name = ['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG']
data_clear = data.copy()
data_clear = data_clear[data_clear.columns.intersection(columns_name)]

# reaname columns
data_clear.rename(columns={
    'HomeTeam':'home_team',
    'AwayTeam':'away_team',
    'FTHG':'home_goals',
    'FTAG':'away_goals'
})

Unnamed: 0,home_team,away_team,home_goals,away_goals
0,Crystal Palace,Arsenal,0,2
1,Fulham,Liverpool,2,2
2,Bournemouth,Aston Villa,2,0
3,Leeds,Wolves,2,1
4,Newcastle,Nott'm Forest,2,0
...,...,...,...,...
141,West Ham,Leicester,0,2
142,Newcastle,Chelsea,1,0
143,Wolves,Arsenal,0,2
144,Brighton,Aston Villa,1,2


# Make Clasement League

In [9]:
def convert_data(data_input):
    teams = list(data_input.HomeTeam.unique())
    result = []
    for team in teams:
        raw_data_home = data_input.loc[data_input.HomeTeam==team][['FTHG', 'FTAG']]
        raw_data_away = data_input.loc[data_input.AwayTeam==team][['FTHG', 'FTAG']]
        play = len(raw_data_home) + len(raw_data_away)
        win = (sum((raw_data_home['FTHG'] > raw_data_home['FTAG'])) + sum((raw_data_away['FTHG'] < raw_data_away['FTAG']))) or 0
        draw = (sum((raw_data_home['FTHG'] == raw_data_home['FTAG'])) + sum((raw_data_away['FTHG'] == raw_data_away['FTAG']))) or 0
        lose = (play - (win + draw))
        gf = ((raw_data_home.FTHG.sum() + raw_data_away.FTAG.sum()) or 0)
        ga = ((raw_data_home.FTAG.sum() + raw_data_away.FTHG.sum()) or 0)
        gd = gf - ga

        point = (win * 3 ) + draw

        data = [team, play, win, draw, lose, gf, ga, gd, point]
        result.append(data)

    return pd.DataFrame(result, columns = [
        'Team', 'Play', 'Win', 'Draw', 'Lose', 
        'Goals For (GF)', 'Goals Against (GA)', 
        'Goal Difference (GD)', 'Point'])

In [None]:
premier_league = convert_data(data_clear)
premier_league.to_csv('clasement_premier_league.csv', index=False)

In [143]:
# preview data
premier_league.sort_values('Point', ascending=False)

Unnamed: 0,Team,Play,Win,Draw,Lose,Goals For (GF),Goals Against (GA),Goal Difference (GD),Point
11,Arsenal,14,12,1,1,33,11,22,37
13,Man City,14,10,2,2,40,14,26,32
4,Newcastle,15,8,6,1,29,11,18,30
5,Tottenham,15,9,2,4,31,21,10,29
8,Man United,14,8,2,4,20,20,0,26
19,Liverpool,14,6,4,4,28,17,11,22
18,Chelsea,14,6,3,5,17,17,0,21
12,Brighton,14,6,3,5,23,19,4,21
1,Fulham,15,5,4,6,24,26,-2,19
16,Brentford,15,4,7,4,23,25,-2,19


# Make Clasement with Prediction
###### total match = 19 * 2

to do:
make:
    1. avg goal
    2. win rate
    3. prediction win
    

In [137]:
def convert_data_with_prdict(data_input):
    teams = list(data_input.HomeTeam.unique())
    result = []
    for team in teams:
        overallmatch = 38
        raw_data_home = data_input.loc[data_input.HomeTeam==team][['FTHG', 'FTAG']]
        raw_data_away = data_input.loc[data_input.AwayTeam==team][['FTHG', 'FTAG']]
        play = len(raw_data_home) + len(raw_data_away)
        win = (sum((raw_data_home['FTHG'] > raw_data_home['FTAG'])) + sum((raw_data_away['FTHG'] < raw_data_away['FTAG']))) or 0
        draw = (sum((raw_data_home['FTHG'] == raw_data_home['FTAG'])) + sum((raw_data_away['FTHG'] == raw_data_away['FTAG']))) or 0
        lose = (play - (win + draw))
        gf = ((raw_data_home.FTHG.sum() + raw_data_away.FTAG.sum()) or 0)
        ga = ((raw_data_home.FTAG.sum() + raw_data_away.FTHG.sum()) or 0)
        gd = gf - ga
        point = (win * 3 ) + draw
        avggf = format((gf/play), '.2f')
        avgga = format((ga/play), '.2f')
        winratio = format((win/play), '.1%')
        drawratio = format((draw/play), '.1%')
        loseratio = format((lose/play), '.1%')

        data = [team, play, win, draw, lose, gf, ga, gd, point, avggf, avgga, winratio, drawratio, loseratio]
        result.append(data)

    return pd.DataFrame(result, columns = [
        'Team', 'Play', 'Win', 'Draw', 'Lose', 
        'Goals For (GF)', 'Goals Against (GA)', 
        'Goal Difference (GD)', 'Point', 'AVG-GF', 'AVG-GA', 'Win-Ratio', 'Draw-Ratio' ,'Lose-Ratio'])

In [138]:
premier_league_with_predict = convert_data_with_prdict(data_clear)
premier_league_with_predict.to_csv('clasement_premier_league_with_predict.csv', index=False)

In [139]:
# preview data
premier_league_with_predict.sort_values('Point', ascending=False)

Unnamed: 0,Team,Play,Win,Draw,Lose,Goals For (GF),Goals Against (GA),Goal Difference (GD),Point,AVG-GF,AVG-GA,Win-Ratio,Draw-Ratio,Lose-Ratio
11,Arsenal,14,12,1,1,33,11,22,37,2.36,0.79,85.7%,7.1%,7.1%
13,Man City,14,10,2,2,40,14,26,32,2.86,1.0,71.4%,14.3%,14.3%
4,Newcastle,15,8,6,1,29,11,18,30,1.93,0.73,53.3%,40.0%,6.7%
5,Tottenham,15,9,2,4,31,21,10,29,2.07,1.4,60.0%,13.3%,26.7%
8,Man United,14,8,2,4,20,20,0,26,1.43,1.43,57.1%,14.3%,28.6%
19,Liverpool,14,6,4,4,28,17,11,22,2.0,1.21,42.9%,28.6%,28.6%
18,Chelsea,14,6,3,5,17,17,0,21,1.21,1.21,42.9%,21.4%,35.7%
12,Brighton,14,6,3,5,23,19,4,21,1.64,1.36,42.9%,21.4%,35.7%
1,Fulham,15,5,4,6,24,26,-2,19,1.6,1.73,33.3%,26.7%,40.0%
16,Brentford,15,4,7,4,23,25,-2,19,1.53,1.67,26.7%,46.7%,26.7%
