In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score

### Importing & cleaning data

In [2]:
matches = pd.read_csv('matches.csv', index_col=0)

In [3]:
matches.head()

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,match report,notes,sh,sot,dist,fk,pk,pkatt,season,team
1,2022-08-07,16:30,Premier League,Matchweek 1,Sun,Away,W,2,0,West Ham,...,Match Report,,13.0,1.0,18.7,1.0,1.0,1.0,2023,Manchester City
2,2022-08-13,15:00,Premier League,Matchweek 2,Sat,Home,W,4,0,Bournemouth,...,Match Report,,19.0,7.0,17.5,0.0,0.0,0.0,2023,Manchester City
3,2022-08-21,16:30,Premier League,Matchweek 3,Sun,Away,D,3,3,Newcastle Utd,...,Match Report,,21.0,10.0,16.2,1.0,0.0,0.0,2023,Manchester City
4,2022-08-27,15:00,Premier League,Matchweek 4,Sat,Home,W,4,2,Crystal Palace,...,Match Report,,18.0,5.0,14.1,0.0,0.0,0.0,2023,Manchester City
5,2022-08-31,19:30,Premier League,Matchweek 5,Wed,Home,W,6,0,Nott'ham Forest,...,Match Report,,17.0,9.0,14.8,0.0,0.0,0.0,2023,Manchester City


In [4]:
matches.shape

(1520, 27)

In [5]:
matches['date'] = pd.to_datetime(matches['date'])

In [6]:
matches.dtypes

date            datetime64[ns]
time                    object
comp                    object
round                   object
day                     object
venue                   object
result                  object
gf                       int64
ga                       int64
opponent                object
xg                     float64
xga                    float64
poss                   float64
attendance             float64
captain                 object
formation               object
referee                 object
match report            object
notes                  float64
sh                     float64
sot                    float64
dist                   float64
fk                     float64
pk                     float64
pkatt                  float64
season                   int64
team                    object
dtype: object

### Creating predictors for machine learning:

In [7]:
matches['venue_code'] = matches['venue'].astype('category').cat.codes

In [8]:
matches.head()

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,notes,sh,sot,dist,fk,pk,pkatt,season,team,venue_code
1,2022-08-07,16:30,Premier League,Matchweek 1,Sun,Away,W,2,0,West Ham,...,,13.0,1.0,18.7,1.0,1.0,1.0,2023,Manchester City,0
2,2022-08-13,15:00,Premier League,Matchweek 2,Sat,Home,W,4,0,Bournemouth,...,,19.0,7.0,17.5,0.0,0.0,0.0,2023,Manchester City,1
3,2022-08-21,16:30,Premier League,Matchweek 3,Sun,Away,D,3,3,Newcastle Utd,...,,21.0,10.0,16.2,1.0,0.0,0.0,2023,Manchester City,0
4,2022-08-27,15:00,Premier League,Matchweek 4,Sat,Home,W,4,2,Crystal Palace,...,,18.0,5.0,14.1,0.0,0.0,0.0,2023,Manchester City,1
5,2022-08-31,19:30,Premier League,Matchweek 5,Wed,Home,W,6,0,Nott'ham Forest,...,,17.0,9.0,14.8,0.0,0.0,0.0,2023,Manchester City,1


In [9]:
matches['opp_code'] = matches['opponent'].astype('category').cat.codes

In [10]:
matches.head()

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,sh,sot,dist,fk,pk,pkatt,season,team,venue_code,opp_code
1,2022-08-07,16:30,Premier League,Matchweek 1,Sun,Away,W,2,0,West Ham,...,13.0,1.0,18.7,1.0,1.0,1.0,2023,Manchester City,0,21
2,2022-08-13,15:00,Premier League,Matchweek 2,Sat,Home,W,4,0,Bournemouth,...,19.0,7.0,17.5,0.0,0.0,0.0,2023,Manchester City,1,2
3,2022-08-21,16:30,Premier League,Matchweek 3,Sun,Away,D,3,3,Newcastle Utd,...,21.0,10.0,16.2,1.0,0.0,0.0,2023,Manchester City,0,15
4,2022-08-27,15:00,Premier League,Matchweek 4,Sat,Home,W,4,2,Crystal Palace,...,18.0,5.0,14.1,0.0,0.0,0.0,2023,Manchester City,1,7
5,2022-08-31,19:30,Premier League,Matchweek 5,Wed,Home,W,6,0,Nott'ham Forest,...,17.0,9.0,14.8,0.0,0.0,0.0,2023,Manchester City,1,17


In [11]:
matches['hour'] = matches['time'].str.replace(":.+", '', regex=True).astype('int')

In [12]:
matches.head()

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,sot,dist,fk,pk,pkatt,season,team,venue_code,opp_code,hour
1,2022-08-07,16:30,Premier League,Matchweek 1,Sun,Away,W,2,0,West Ham,...,1.0,18.7,1.0,1.0,1.0,2023,Manchester City,0,21,16
2,2022-08-13,15:00,Premier League,Matchweek 2,Sat,Home,W,4,0,Bournemouth,...,7.0,17.5,0.0,0.0,0.0,2023,Manchester City,1,2,15
3,2022-08-21,16:30,Premier League,Matchweek 3,Sun,Away,D,3,3,Newcastle Utd,...,10.0,16.2,1.0,0.0,0.0,2023,Manchester City,0,15,16
4,2022-08-27,15:00,Premier League,Matchweek 4,Sat,Home,W,4,2,Crystal Palace,...,5.0,14.1,0.0,0.0,0.0,2023,Manchester City,1,7,15
5,2022-08-31,19:30,Premier League,Matchweek 5,Wed,Home,W,6,0,Nott'ham Forest,...,9.0,14.8,0.0,0.0,0.0,2023,Manchester City,1,17,19


In [13]:
matches['day_code'] = matches['date'].dt.dayofweek

In [14]:
matches.head()

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,dist,fk,pk,pkatt,season,team,venue_code,opp_code,hour,day_code
1,2022-08-07,16:30,Premier League,Matchweek 1,Sun,Away,W,2,0,West Ham,...,18.7,1.0,1.0,1.0,2023,Manchester City,0,21,16,6
2,2022-08-13,15:00,Premier League,Matchweek 2,Sat,Home,W,4,0,Bournemouth,...,17.5,0.0,0.0,0.0,2023,Manchester City,1,2,15,5
3,2022-08-21,16:30,Premier League,Matchweek 3,Sun,Away,D,3,3,Newcastle Utd,...,16.2,1.0,0.0,0.0,2023,Manchester City,0,15,16,6
4,2022-08-27,15:00,Premier League,Matchweek 4,Sat,Home,W,4,2,Crystal Palace,...,14.1,0.0,0.0,0.0,2023,Manchester City,1,7,15,5
5,2022-08-31,19:30,Premier League,Matchweek 5,Wed,Home,W,6,0,Nott'ham Forest,...,14.8,0.0,0.0,0.0,2023,Manchester City,1,17,19,2


In [15]:
matches['target'] = (matches['result'] == 'W').astype('int')

In [16]:
matches.head()

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,fk,pk,pkatt,season,team,venue_code,opp_code,hour,day_code,target
1,2022-08-07,16:30,Premier League,Matchweek 1,Sun,Away,W,2,0,West Ham,...,1.0,1.0,1.0,2023,Manchester City,0,21,16,6,1
2,2022-08-13,15:00,Premier League,Matchweek 2,Sat,Home,W,4,0,Bournemouth,...,0.0,0.0,0.0,2023,Manchester City,1,2,15,5,1
3,2022-08-21,16:30,Premier League,Matchweek 3,Sun,Away,D,3,3,Newcastle Utd,...,1.0,0.0,0.0,2023,Manchester City,0,15,16,6,0
4,2022-08-27,15:00,Premier League,Matchweek 4,Sat,Home,W,4,2,Crystal Palace,...,0.0,0.0,0.0,2023,Manchester City,1,7,15,5,1
5,2022-08-31,19:30,Premier League,Matchweek 5,Wed,Home,W,6,0,Nott'ham Forest,...,0.0,0.0,0.0,2023,Manchester City,1,17,19,2,1


In [17]:
rf = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=1)

In [18]:
train = matches[matches['date'] < '2023-01-01']

In [19]:
test = matches[matches['date'] > '2023-01-01']

In [20]:
predictors = ['venue_code', 'opp_code', 'hour', 'day_code']

In [21]:
rf.fit(train[predictors], train['target'])

In [22]:
predictions = rf.predict(test[predictors])

In [23]:
acc = accuracy_score(test['target'], predictions)

In [24]:
acc

0.5981308411214953

In [25]:
combined = pd.DataFrame(dict(actual=test['target'], prediction=predictions))

In [26]:
pd.crosstab(index=combined['actual'], columns=combined['prediction'])

prediction,0,1
actual,Unnamed: 1_level_1,Unnamed: 2_level_1
0,203,60
1,112,53


In [27]:
precision_score(test['target'], predictions)

0.4690265486725664

Precision score is telling us that when we predict that a team will win, we are only accurate 46.9% of the time.

### Creating more predictors to improve the accuracy of the model:

In [28]:
grouped_matches = matches.groupby('team')

In [29]:
group = grouped_matches.get_group('Chelsea')

In [30]:
def rolling_avg(group, cols, new_cols):
    group = group.sort_values('date')
    rolling_stats = group[cols].rolling(3, closed='left').mean()
    group[new_cols] = rolling_stats
    group = group.dropna(subset=new_cols)
    return group

In [31]:
cols = ['gf', 'ga', 'sh', 'sot', 'dist', 'fk', 'pk', 'pkatt']
new_cols = [f'{c}_rolling' for c in cols]
new_cols

['gf_rolling',
 'ga_rolling',
 'sh_rolling',
 'sot_rolling',
 'dist_rolling',
 'fk_rolling',
 'pk_rolling',
 'pkatt_rolling']

In [32]:
rolling_avg(group, cols, new_cols)

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,day_code,target,gf_rolling,ga_rolling,sh_rolling,sot_rolling,dist_rolling,fk_rolling,pk_rolling,pkatt_rolling
4,2021-09-11,17:30,Premier League,Matchweek 4,Sat,Home,W,3,0,Aston Villa,...,5,1,2.000000,0.333333,13.666667,4.666667,16.500000,1.333333,0.0,0.0
6,2021-09-19,16:30,Premier League,Matchweek 5,Sun,Away,W,3,0,Tottenham,...,6,1,2.000000,0.333333,13.333333,4.000000,17.033333,0.000000,0.0,0.0
8,2021-09-25,12:30,Premier League,Matchweek 6,Sat,Home,L,0,1,Manchester City,...,5,0,2.333333,0.333333,12.666667,5.333333,16.533333,0.000000,0.0,0.0
10,2021-10-02,15:00,Premier League,Matchweek 7,Sat,Home,W,3,1,Southampton,...,5,1,2.000000,0.333333,12.333333,4.333333,21.666667,0.333333,0.0,0.0
11,2021-10-16,17:30,Premier League,Matchweek 8,Sat,Away,W,1,0,Brentford,...,5,1,2.000000,0.666667,15.000000,6.000000,18.400000,0.333333,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45,2023-05-06,15:00,Premier League,Matchweek 35,Sat,Away,W,3,1,Bournemouth,...,5,1,0.666667,2.333333,10.000000,3.333333,17.533333,0.000000,0.0,0.0
46,2023-05-13,15:00,Premier League,Matchweek 36,Sat,Home,D,2,2,Nott'ham Forest,...,5,0,1.333333,2.000000,11.000000,4.333333,16.866667,0.000000,0.0,0.0
47,2023-05-21,16:00,Premier League,Matchweek 37,Sun,Away,L,0,1,Manchester City,...,6,0,2.000000,2.000000,10.666667,5.000000,17.166667,0.333333,0.0,0.0
48,2023-05-25,20:00,Premier League,Matchweek 32,Thu,Away,L,1,4,Manchester Utd,...,3,0,1.666667,1.333333,12.666667,5.666667,16.666667,0.666667,0.0,0.0


In [33]:
matches_rolling = matches.groupby('team').apply(lambda x: rolling_avg(x, cols, new_cols))

In [34]:
matches_rolling

Unnamed: 0_level_0,Unnamed: 1_level_0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,day_code,target,gf_rolling,ga_rolling,sh_rolling,sot_rolling,dist_rolling,fk_rolling,pk_rolling,pkatt_rolling
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Arsenal,4,2021-09-11,15:00,Premier League,Matchweek 4,Sat,Home,W,1,0,Norwich City,...,5,1,0.000000,3.000000,9.666667,2.333333,14.833333,0.333333,0.000000,0.000000
Arsenal,5,2021-09-18,15:00,Premier League,Matchweek 5,Sat,Away,W,1,0,Burnley,...,5,1,0.333333,2.333333,12.333333,3.000000,14.133333,0.333333,0.000000,0.000000
Arsenal,7,2021-09-26,16:30,Premier League,Matchweek 6,Sun,Home,W,3,1,Tottenham,...,6,1,0.666667,1.666667,14.666667,3.000000,14.800000,0.666667,0.000000,0.000000
Arsenal,8,2021-10-02,17:30,Premier League,Matchweek 7,Sat,Away,D,0,0,Brighton,...,5,0,1.666667,0.333333,18.333333,5.333333,18.433333,0.666667,0.000000,0.000000
Arsenal,9,2021-10-18,20:00,Premier League,Matchweek 8,Mon,Home,D,2,2,Crystal Palace,...,0,0,1.333333,0.333333,11.000000,4.000000,19.833333,0.666667,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wolverhampton Wanderers,39,2023-04-29,15:00,Premier League,Matchweek 34,Sat,Away,L,0,6,Brighton,...,5,0,1.666667,0.666667,11.666667,4.666667,18.700000,0.666667,0.333333,0.333333
Wolverhampton Wanderers,40,2023-05-06,15:00,Premier League,Matchweek 35,Sat,Home,W,1,0,Aston Villa,...,5,1,1.000000,2.666667,11.333333,2.333333,18.800000,0.666667,0.333333,0.333333
Wolverhampton Wanderers,41,2023-05-13,15:00,Premier League,Matchweek 36,Sat,Away,L,0,2,Manchester Utd,...,5,0,1.000000,2.000000,8.000000,2.000000,17.766667,0.000000,0.333333,0.333333
Wolverhampton Wanderers,42,2023-05-20,15:00,Premier League,Matchweek 37,Sat,Home,D,1,1,Everton,...,5,0,0.333333,2.666667,7.000000,1.333333,15.600000,0.000000,0.000000,0.000000


In [35]:
matches_rolling = matches_rolling.droplevel('team')

In [36]:
matches_rolling

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,day_code,target,gf_rolling,ga_rolling,sh_rolling,sot_rolling,dist_rolling,fk_rolling,pk_rolling,pkatt_rolling
4,2021-09-11,15:00,Premier League,Matchweek 4,Sat,Home,W,1,0,Norwich City,...,5,1,0.000000,3.000000,9.666667,2.333333,14.833333,0.333333,0.000000,0.000000
5,2021-09-18,15:00,Premier League,Matchweek 5,Sat,Away,W,1,0,Burnley,...,5,1,0.333333,2.333333,12.333333,3.000000,14.133333,0.333333,0.000000,0.000000
7,2021-09-26,16:30,Premier League,Matchweek 6,Sun,Home,W,3,1,Tottenham,...,6,1,0.666667,1.666667,14.666667,3.000000,14.800000,0.666667,0.000000,0.000000
8,2021-10-02,17:30,Premier League,Matchweek 7,Sat,Away,D,0,0,Brighton,...,5,0,1.666667,0.333333,18.333333,5.333333,18.433333,0.666667,0.000000,0.000000
9,2021-10-18,20:00,Premier League,Matchweek 8,Mon,Home,D,2,2,Crystal Palace,...,0,0,1.333333,0.333333,11.000000,4.000000,19.833333,0.666667,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39,2023-04-29,15:00,Premier League,Matchweek 34,Sat,Away,L,0,6,Brighton,...,5,0,1.666667,0.666667,11.666667,4.666667,18.700000,0.666667,0.333333,0.333333
40,2023-05-06,15:00,Premier League,Matchweek 35,Sat,Home,W,1,0,Aston Villa,...,5,1,1.000000,2.666667,11.333333,2.333333,18.800000,0.666667,0.333333,0.333333
41,2023-05-13,15:00,Premier League,Matchweek 36,Sat,Away,L,0,2,Manchester Utd,...,5,0,1.000000,2.000000,8.000000,2.000000,17.766667,0.000000,0.333333,0.333333
42,2023-05-20,15:00,Premier League,Matchweek 37,Sat,Home,D,1,1,Everton,...,5,0,0.333333,2.666667,7.000000,1.333333,15.600000,0.000000,0.000000,0.000000


In [37]:
matches_rolling.index = range(matches_rolling.shape[0])

In [38]:
matches_rolling

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,day_code,target,gf_rolling,ga_rolling,sh_rolling,sot_rolling,dist_rolling,fk_rolling,pk_rolling,pkatt_rolling
0,2021-09-11,15:00,Premier League,Matchweek 4,Sat,Home,W,1,0,Norwich City,...,5,1,0.000000,3.000000,9.666667,2.333333,14.833333,0.333333,0.000000,0.000000
1,2021-09-18,15:00,Premier League,Matchweek 5,Sat,Away,W,1,0,Burnley,...,5,1,0.333333,2.333333,12.333333,3.000000,14.133333,0.333333,0.000000,0.000000
2,2021-09-26,16:30,Premier League,Matchweek 6,Sun,Home,W,3,1,Tottenham,...,6,1,0.666667,1.666667,14.666667,3.000000,14.800000,0.666667,0.000000,0.000000
3,2021-10-02,17:30,Premier League,Matchweek 7,Sat,Away,D,0,0,Brighton,...,5,0,1.666667,0.333333,18.333333,5.333333,18.433333,0.666667,0.000000,0.000000
4,2021-10-18,20:00,Premier League,Matchweek 8,Mon,Home,D,2,2,Crystal Palace,...,0,0,1.333333,0.333333,11.000000,4.000000,19.833333,0.666667,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1446,2023-04-29,15:00,Premier League,Matchweek 34,Sat,Away,L,0,6,Brighton,...,5,0,1.666667,0.666667,11.666667,4.666667,18.700000,0.666667,0.333333,0.333333
1447,2023-05-06,15:00,Premier League,Matchweek 35,Sat,Home,W,1,0,Aston Villa,...,5,1,1.000000,2.666667,11.333333,2.333333,18.800000,0.666667,0.333333,0.333333
1448,2023-05-13,15:00,Premier League,Matchweek 36,Sat,Away,L,0,2,Manchester Utd,...,5,0,1.000000,2.000000,8.000000,2.000000,17.766667,0.000000,0.333333,0.333333
1449,2023-05-20,15:00,Premier League,Matchweek 37,Sat,Home,D,1,1,Everton,...,5,0,0.333333,2.666667,7.000000,1.333333,15.600000,0.000000,0.000000,0.000000


### Retraining ML Model:

In [39]:
def make_predictions(data, predictors):
    train = data[data['date'] < '2023-01-01']
    test = data[data['date'] > '2023-01-01']
    rf.fit(train[predictors], train['target'])
    predictions = rf.predict(test[predictors])
    combined = pd.DataFrame(dict(actual=test['target'], prediction=predictions), index=test.index)
    precision = precision_score(test['target'], predictions)
    return combined, precision

In [40]:
combined, precision = make_predictions(matches_rolling, predictors + new_cols)

In [41]:
precision

0.5384615384615384

In [42]:
combined

Unnamed: 0,actual,prediction
51,0,1
52,1,0
53,1,1
54,0,1
55,0,1
...,...,...
1446,0,0
1447,1,0
1448,0,0
1449,0,0


In [43]:
combined = combined.merge(matches_rolling[['date', 'team', 'opponent', 'result']], left_index=True, right_index=True)

In [44]:
combined

Unnamed: 0,actual,prediction,date,team,opponent,result
51,0,1,2023-01-03,Arsenal,Newcastle Utd,D
52,1,0,2023-01-15,Arsenal,Tottenham,W
53,1,1,2023-01-22,Arsenal,Manchester Utd,W
54,0,1,2023-02-04,Arsenal,Everton,L
55,0,1,2023-02-11,Arsenal,Brentford,D
...,...,...,...,...,...,...
1446,0,0,2023-04-29,Wolverhampton Wanderers,Brighton,L
1447,1,0,2023-05-06,Wolverhampton Wanderers,Aston Villa,W
1448,0,0,2023-05-13,Wolverhampton Wanderers,Manchester Utd,L
1449,0,0,2023-05-20,Wolverhampton Wanderers,Everton,D


Fixing inconsistencies between team names in 'team' and 'opponent' columns.

In [45]:
class MissingDict(dict):
    __missing__ = lambda self, key: key

map_values = {"Brighton and Hove Albion": "Brighton", 
              "Manchester United": "Manchester Utd", 
              "Newcastle United": "Newcastle Utd", 
              "Tottenham Hotspur": "Tottenham", 
              "West Ham United": "West Ham", 
              "Wolverhampton Wanderers": "Wolves"} 

mapping = MissingDict(**map_values)

In [47]:
mapping['Arsenal']

'Arsenal'

In [48]:
mapping['Wolverhampton Wanderers']

'Wolves'

In [51]:
combined["new_team"] = combined["team"].map(mapping)

In [52]:
merged = combined.merge(combined, left_on=['date','new_team'], right_on=['date', 'opponent'])

In [53]:
merged

Unnamed: 0,actual_x,prediction_x,date,team_x,opponent_x,result_x,new_team_x,actual_y,prediction_y,team_y,opponent_y,result_y,new_team_y
0,0,1,2023-01-03,Arsenal,Newcastle Utd,D,Arsenal,0,0,Newcastle United,Arsenal,D,Newcastle Utd
1,1,0,2023-01-15,Arsenal,Tottenham,W,Arsenal,0,0,Tottenham Hotspur,Arsenal,L,Tottenham
2,1,1,2023-01-22,Arsenal,Manchester Utd,W,Arsenal,0,1,Manchester United,Arsenal,L,Manchester Utd
3,0,1,2023-02-04,Arsenal,Everton,L,Arsenal,1,0,Everton,Arsenal,W,Everton
4,0,1,2023-02-11,Arsenal,Brentford,D,Arsenal,0,0,Brentford,Arsenal,D,Brentford
...,...,...,...,...,...,...,...,...,...,...,...,...,...
402,0,0,2023-04-29,Wolverhampton Wanderers,Brighton,L,Wolves,1,1,Brighton and Hove Albion,Wolves,W,Brighton
403,1,0,2023-05-06,Wolverhampton Wanderers,Aston Villa,W,Wolves,0,1,Aston Villa,Wolves,L,Aston Villa
404,0,0,2023-05-13,Wolverhampton Wanderers,Manchester Utd,L,Wolves,1,1,Manchester United,Wolves,W,Manchester Utd
405,0,0,2023-05-20,Wolverhampton Wanderers,Everton,D,Wolves,0,0,Everton,Wolves,D,Everton


In [57]:
merged[(merged["prediction_x"] == 1) & (merged["prediction_y"] ==0)]["actual_x"].value_counts()

1    50
0    39
Name: actual_x, dtype: int64

In [58]:
50/89

0.5617977528089888