In [1]:
import pandas as pd

In [2]:
matches = pd.read_csv("matches.csv", index_col = 0)

In [3]:
matches.head()

Unnamed: 0,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,Opponent,...,Match Report,Notes,Sh,SoT,Dist,FK,PK,PKatt,Season,Team
0,2023-08-13,16:30,Premier League,Matchweek 1,Sun,Away,D,1.0,1.0,Chelsea,...,Match Report,,13.0,1.0,17.8,0.0,0,0,2022,Liverpool
1,2023-08-19,15:00,Premier League,Matchweek 2,Sat,Home,W,3.0,1.0,Bournemouth,...,Match Report,,25.0,9.0,16.8,1.0,0,1,2022,Liverpool
2,2023-08-27,16:30,Premier League,Matchweek 3,Sun,Away,W,2.0,1.0,Newcastle Utd,...,Match Report,,9.0,4.0,17.2,1.0,0,0,2022,Liverpool
3,2023-09-03,14:00,Premier League,Matchweek 4,Sun,Home,W,3.0,0.0,Aston Villa,...,Match Report,,17.0,4.0,14.7,0.0,0,0,2022,Liverpool
4,2023-09-16,12:30,Premier League,Matchweek 5,Sat,Away,W,3.0,1.0,Wolves,...,Match Report,,16.0,5.0,15.8,0.0,0,0,2022,Liverpool


In [4]:
matches.shape

(2368, 27)

## Investigating missing data

In [5]:
matches["Team"].value_counts()

Liverpool                   119
Brighton and Hove Albion    119
Nottingham Forest           119
Everton                     119
Brentford                   119
Crystal Palace              119
Manchester City             119
Wolverhampton Wanderers     119
Fulham                      119
Newcastle United            119
West Ham United             119
Manchester United           119
Aston Villa                 119
Arsenal                     119
Chelsea                     116
Bournemouth                 116
Tottenham Hotspur           116
Burnley                      81
Sheffield United             81
Luton Town                   78
Leicester City               38
Leeds United                 38
Southampton                  38
Name: Team, dtype: int64

In [6]:
matches["Round"].value_counts()

Matchweek 1     80
Matchweek 14    80
Matchweek 27    80
Matchweek 25    80
Matchweek 24    80
Matchweek 23    80
Matchweek 22    80
Matchweek 21    80
Matchweek 2     80
Matchweek 19    80
Matchweek 18    80
Matchweek 16    80
Matchweek 15    80
Matchweek 20    80
Matchweek 13    80
Matchweek 7     80
Matchweek 12    80
Matchweek 4     80
Matchweek 5     80
Matchweek 6     80
Matchweek 3     80
Matchweek 8     80
Matchweek 10    80
Matchweek 11    80
Matchweek 9     80
Matchweek 26    74
Matchweek 17    74
Matchweek 28    20
Matchweek 32    20
Matchweek 37    20
Matchweek 36    20
Matchweek 35    20
Matchweek 31    20
Matchweek 34    20
Matchweek 33    20
Matchweek 30    20
Matchweek 29    20
Matchweek 38    20
Name: Round, dtype: int64

## Cleaning data

In [7]:
matches.dtypes

Date             object
Time             object
Comp             object
Round            object
Day              object
Venue            object
Result           object
GF              float64
GA              float64
Opponent         object
xG              float64
xGA             float64
Poss            float64
Attendance      float64
Captain          object
Formation        object
Referee          object
Match Report     object
Notes           float64
Sh              float64
SoT             float64
Dist            float64
FK              float64
PK                int64
PKatt             int64
Season            int64
Team             object
dtype: object

In [25]:
del matches["Comp"]

In [26]:
del matches["Notes"]

In [27]:
matches["Date"] = pd.to_datetime(matches["Date"])

## Creating Predictors for ML

In [28]:
#Turning the venue column to numeric 
matches["venue_code"] = matches["Venue"].astype("category").cat.codes

In [29]:
matches["opp_code"] = matches["Opponent"].astype("category").cat.codes

In [30]:
matches["hour"] = matches["Time"].str.replace(":.+","",regex = True).astype("int") 

In [31]:
matches["day_code"] = matches["Date"].dt.dayofweek

In [32]:
matches["target"] = (matches["Result"] == "W").astype("int")

In [33]:
matches

Unnamed: 0,Date,Time,Round,Day,Venue,Result,GF,GA,Opponent,xG,...,FK,PK,PKatt,Season,Team,venue_code,opp_code,hour,day_code,target
0,2023-08-13,16:30,Matchweek 1,Sun,Away,D,1.0,1.0,Chelsea,1.3,...,0.0,0,0,2022,Liverpool,0,6,16,6,0
1,2023-08-19,15:00,Matchweek 2,Sat,Home,W,3.0,1.0,Bournemouth,3.0,...,1.0,0,1,2022,Liverpool,1,2,15,5,1
2,2023-08-27,16:30,Matchweek 3,Sun,Away,W,2.0,1.0,Newcastle Utd,0.9,...,1.0,0,0,2022,Liverpool,0,16,16,6,1
3,2023-09-03,14:00,Matchweek 4,Sun,Home,W,3.0,0.0,Aston Villa,2.5,...,0.0,0,0,2022,Liverpool,1,1,14,6,1
4,2023-09-16,12:30,Matchweek 5,Sat,Away,W,3.0,1.0,Wolves,2.5,...,0.0,0,0,2022,Liverpool,0,22,12,5,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42,2023-04-30,14:00,Matchweek 34,Sun,Away,L,1.0,3.0,Newcastle Utd,0.8,...,0.0,0,0,2021,Southampton,0,16,14,6,0
43,2023-05-08,20:00,Matchweek 35,Mon,Away,L,3.0,4.0,Nott'ham Forest,3.1,...,0.0,1,1,2021,Southampton,0,17,20,0,0
44,2023-05-13,15:00,Matchweek 36,Sat,Home,L,0.0,2.0,Fulham,0.4,...,0.0,0,0,2021,Southampton,1,9,15,5,0
45,2023-05-21,14:00,Matchweek 37,Sun,Away,L,1.0,3.0,Brighton,0.6,...,1.0,0,0,2021,Southampton,0,4,14,6,0


## Creating the ML Model

In [34]:
from sklearn.ensemble import RandomForestClassifier

In [35]:
rf = RandomForestClassifier(n_estimators = 50, min_samples_split = 10, random_state = 1)

In [40]:
train = matches[matches["Date"] < '2023-01-01']

In [41]:
test = matches[matches["Date"] > '2023-01-01']

In [42]:
predictors = ["venue_code", "opp_code", "hour", "day_code"]

In [43]:
rf.fit(train[predictors], train["target"])

In [44]:
preds= rf.predict(test[predictors])

In [45]:
from sklearn.metrics import accuracy_score

In [46]:
acc = accuracy_score(test["target"], preds)

In [47]:
acc

0.6041257367387033

In [48]:
#In what situation our accuracy was good
combined = pd.DataFrame(dict(actual = test["target"], prediction = preds))

In [49]:
pd.crosstab(index = combined["actual"], columns = combined["prediction"])

prediction,0,1
actual,Unnamed: 1_level_1,Unnamed: 2_level_1
0,979,247
1,559,251


In [50]:
from sklearn.metrics import precision_score

In [51]:
precision_score(test["target"], preds)

0.5040160642570282

## Creating more predictors to increase the accuracy of the model

In [52]:
grouped_matches = matches.groupby("Team")

In [53]:
group = grouped_matches.get_group("Liverpool")

In [56]:
group.head()

Unnamed: 0,Date,Time,Round,Day,Venue,Result,GF,GA,Opponent,xG,...,FK,PK,PKatt,Season,Team,venue_code,opp_code,hour,day_code,target
0,2023-08-13,16:30,Matchweek 1,Sun,Away,D,1.0,1.0,Chelsea,1.3,...,0.0,0,0,2022,Liverpool,0,6,16,6,0
1,2023-08-19,15:00,Matchweek 2,Sat,Home,W,3.0,1.0,Bournemouth,3.0,...,1.0,0,1,2022,Liverpool,1,2,15,5,1
2,2023-08-27,16:30,Matchweek 3,Sun,Away,W,2.0,1.0,Newcastle Utd,0.9,...,1.0,0,0,2022,Liverpool,0,16,16,6,1
3,2023-09-03,14:00,Matchweek 4,Sun,Home,W,3.0,0.0,Aston Villa,2.5,...,0.0,0,0,2022,Liverpool,1,1,14,6,1
4,2023-09-16,12:30,Matchweek 5,Sat,Away,W,3.0,1.0,Wolves,2.5,...,0.0,0,0,2022,Liverpool,0,22,12,5,1


In [55]:
def rolling_avg(group, cols, new_cols):
    group = group.sort_values("Date")
    rolling_stats = group[cols].rolling(3, closed = "left").mean()
    group[new_cols] = rolling_stats
    group = group.dropna(subset=new_cols)
    return group

In [58]:
cols = ["GF", "GA", "Sh", "SoT"	,"Dist","FK","PK","PKatt"]
new_cols = [f"{c}_rolling" for c in cols]

In [59]:
new_cols

['GF_rolling',
 'GA_rolling',
 'Sh_rolling',
 'SoT_rolling',
 'Dist_rolling',
 'FK_rolling',
 'PK_rolling',
 'PKatt_rolling']

In [60]:
rolling_avg(group, cols, new_cols)

Unnamed: 0,Date,Time,Round,Day,Venue,Result,GF,GA,Opponent,xG,...,day_code,target,GF_rolling,GA_rolling,Sh_rolling,SoT_rolling,Dist_rolling,FK_rolling,PK_rolling,PKatt_rolling
4,2022-08-27,15:00,Matchweek 4,Sat,Home,W,9.0,0.0,Bournemouth,3.3,...,5,1,1.333333,1.666667,17.333333,4.000000,14.733333,0.000000,0.0,0.0
5,2022-08-31,20:00,Matchweek 5,Wed,Home,W,2.0,1.0,Newcastle Utd,1.5,...,2,1,3.666667,1.000000,20.000000,7.000000,14.466667,0.000000,0.0,0.0
6,2022-09-03,12:30,Matchweek 6,Sat,Away,D,0.0,0.0,Everton,2.1,...,5,0,4.000000,1.000000,19.666667,7.666667,14.600000,0.000000,0.0,0.0
9,2022-10-01,15:00,Matchweek 9,Sat,Home,D,3.0,3.0,Brighton,1.5,...,5,0,3.666667,0.333333,21.666667,8.666667,14.166667,0.000000,0.0,0.0
11,2022-10-09,16:30,Matchweek 10,Sun,Away,L,2.0,3.0,Arsenal,1.1,...,6,0,1.666667,1.333333,20.333333,7.000000,15.900000,0.333333,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38,2024-02-21,19:30,Matchweek 26,Wed,Home,W,4.0,1.0,Luton Town,3.3,...,2,1,4.000000,1.000000,19.666667,9.666667,16.233333,0.666667,0.0,0.0
38,2024-02-21,19:30,Matchweek 26,Wed,Home,W,4.0,1.0,Luton Town,3.3,...,2,1,4.000000,1.000000,24.333333,11.333333,16.566667,0.333333,0.0,0.0
41,2024-03-02,15:00,Matchweek 27,Sat,Away,W,1.0,0.0,Nott'ham Forest,2.0,...,5,1,4.000000,1.000000,29.000000,13.000000,16.900000,0.000000,0.0,0.0
41,2024-03-02,15:00,Matchweek 27,Sat,Away,W,1.0,0.0,Nott'ham Forest,2.0,...,5,1,3.000000,0.666667,26.666667,9.000000,16.133333,0.000000,0.0,0.0


In [72]:
matches_rolling = matches.groupby("Team").apply(lambda x: rolling_avg(x, cols, new_cols))

In [73]:
matches_rolling

Unnamed: 0_level_0,Unnamed: 1_level_0,Date,Time,Round,Day,Venue,Result,GF,GA,Opponent,xG,...,day_code,target,GF_rolling,GA_rolling,Sh_rolling,SoT_rolling,Dist_rolling,FK_rolling,PK_rolling,PKatt_rolling
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Arsenal,3,2022-08-27,17:30,Matchweek 4,Sat,Home,W,2.0,1.0,Fulham,2.6,...,5,1,3.000000,0.666667,14.333333,5.000000,14.133333,0.333333,0.0,0.0
Arsenal,4,2022-08-31,19:30,Matchweek 5,Wed,Home,W,2.0,1.0,Aston Villa,2.4,...,2,1,3.000000,1.000000,18.333333,7.000000,14.433333,0.333333,0.0,0.0
Arsenal,5,2022-09-04,16:30,Matchweek 6,Sun,Away,L,1.0,3.0,Manchester Utd,1.3,...,6,0,2.333333,0.666667,19.333333,7.333333,15.533333,0.666667,0.0,0.0
Arsenal,7,2022-09-18,12:00,Matchweek 8,Sun,Away,W,3.0,0.0,Brentford,1.5,...,6,1,1.666667,1.666667,20.000000,6.333333,16.800000,1.000000,0.0,0.0
Arsenal,8,2022-10-01,12:30,Matchweek 9,Sat,Home,W,3.0,1.0,Tottenham,2.4,...,5,1,2.000000,1.333333,17.000000,6.000000,17.700000,0.666667,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wolverhampton Wanderers,30,2024-02-25,13:30,Matchweek 26,Sun,Home,W,1.0,0.0,Sheffield Utd,1.3,...,6,1,1.666667,0.666667,12.333333,5.333333,15.466667,0.000000,0.0,0.0
Wolverhampton Wanderers,30,2024-02-25,13:30,Matchweek 26,Sun,Home,W,1.0,0.0,Sheffield Utd,1.3,...,6,1,1.333333,0.333333,12.666667,3.666667,15.033333,0.000000,0.0,0.0
Wolverhampton Wanderers,32,2024-03-02,15:00,Matchweek 27,Sat,Away,L,0.0,3.0,Newcastle Utd,0.8,...,5,0,1.000000,0.000000,13.000000,2.000000,14.600000,0.000000,0.0,0.0
Wolverhampton Wanderers,32,2024-03-02,15:00,Matchweek 27,Sat,Away,L,0.0,3.0,Newcastle Utd,0.8,...,5,0,0.666667,1.000000,12.666667,2.333333,15.033333,0.000000,0.0,0.0


In [74]:
matches_rolling = matches_rolling.droplevel("Team")

In [75]:
matches_rolling

Unnamed: 0,Date,Time,Round,Day,Venue,Result,GF,GA,Opponent,xG,...,day_code,target,GF_rolling,GA_rolling,Sh_rolling,SoT_rolling,Dist_rolling,FK_rolling,PK_rolling,PKatt_rolling
3,2022-08-27,17:30,Matchweek 4,Sat,Home,W,2.0,1.0,Fulham,2.6,...,5,1,3.000000,0.666667,14.333333,5.000000,14.133333,0.333333,0.0,0.0
4,2022-08-31,19:30,Matchweek 5,Wed,Home,W,2.0,1.0,Aston Villa,2.4,...,2,1,3.000000,1.000000,18.333333,7.000000,14.433333,0.333333,0.0,0.0
5,2022-09-04,16:30,Matchweek 6,Sun,Away,L,1.0,3.0,Manchester Utd,1.3,...,6,0,2.333333,0.666667,19.333333,7.333333,15.533333,0.666667,0.0,0.0
7,2022-09-18,12:00,Matchweek 8,Sun,Away,W,3.0,0.0,Brentford,1.5,...,6,1,1.666667,1.666667,20.000000,6.333333,16.800000,1.000000,0.0,0.0
8,2022-10-01,12:30,Matchweek 9,Sat,Home,W,3.0,1.0,Tottenham,2.4,...,5,1,2.000000,1.333333,17.000000,6.000000,17.700000,0.666667,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30,2024-02-25,13:30,Matchweek 26,Sun,Home,W,1.0,0.0,Sheffield Utd,1.3,...,6,1,1.666667,0.666667,12.333333,5.333333,15.466667,0.000000,0.0,0.0
30,2024-02-25,13:30,Matchweek 26,Sun,Home,W,1.0,0.0,Sheffield Utd,1.3,...,6,1,1.333333,0.333333,12.666667,3.666667,15.033333,0.000000,0.0,0.0
32,2024-03-02,15:00,Matchweek 27,Sat,Away,L,0.0,3.0,Newcastle Utd,0.8,...,5,0,1.000000,0.000000,13.000000,2.000000,14.600000,0.000000,0.0,0.0
32,2024-03-02,15:00,Matchweek 27,Sat,Away,L,0.0,3.0,Newcastle Utd,0.8,...,5,0,0.666667,1.000000,12.666667,2.333333,15.033333,0.000000,0.0,0.0


In [76]:
matches_rolling.index = range(matches_rolling.shape[0])

In [77]:
matches_rolling

Unnamed: 0,Date,Time,Round,Day,Venue,Result,GF,GA,Opponent,xG,...,day_code,target,GF_rolling,GA_rolling,Sh_rolling,SoT_rolling,Dist_rolling,FK_rolling,PK_rolling,PKatt_rolling
0,2022-08-27,17:30,Matchweek 4,Sat,Home,W,2.0,1.0,Fulham,2.6,...,5,1,3.000000,0.666667,14.333333,5.000000,14.133333,0.333333,0.0,0.0
1,2022-08-31,19:30,Matchweek 5,Wed,Home,W,2.0,1.0,Aston Villa,2.4,...,2,1,3.000000,1.000000,18.333333,7.000000,14.433333,0.333333,0.0,0.0
2,2022-09-04,16:30,Matchweek 6,Sun,Away,L,1.0,3.0,Manchester Utd,1.3,...,6,0,2.333333,0.666667,19.333333,7.333333,15.533333,0.666667,0.0,0.0
3,2022-09-18,12:00,Matchweek 8,Sun,Away,W,3.0,0.0,Brentford,1.5,...,6,1,1.666667,1.666667,20.000000,6.333333,16.800000,1.000000,0.0,0.0
4,2022-10-01,12:30,Matchweek 9,Sat,Home,W,3.0,1.0,Tottenham,2.4,...,5,1,2.000000,1.333333,17.000000,6.000000,17.700000,0.666667,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2294,2024-02-25,13:30,Matchweek 26,Sun,Home,W,1.0,0.0,Sheffield Utd,1.3,...,6,1,1.666667,0.666667,12.333333,5.333333,15.466667,0.000000,0.0,0.0
2295,2024-02-25,13:30,Matchweek 26,Sun,Home,W,1.0,0.0,Sheffield Utd,1.3,...,6,1,1.333333,0.333333,12.666667,3.666667,15.033333,0.000000,0.0,0.0
2296,2024-03-02,15:00,Matchweek 27,Sat,Away,L,0.0,3.0,Newcastle Utd,0.8,...,5,0,1.000000,0.000000,13.000000,2.000000,14.600000,0.000000,0.0,0.0
2297,2024-03-02,15:00,Matchweek 27,Sat,Away,L,0.0,3.0,Newcastle Utd,0.8,...,5,0,0.666667,1.000000,12.666667,2.333333,15.033333,0.000000,0.0,0.0


## Retraning the ML Model

In [82]:
def make_preds(data, predictors):
    train = data[data["Date"] < '2023-01-01']
    test = data[data["Date"] > '2023-01-01']
    rf.fit(train[predictors], train["target"])
    preds = rf.predict(test[predictors])
    combined = pd.DataFrame(dict(actual=test["target"], predicted=preds), index=test.index)
    precision = precision_score(test["target"], preds)
    return combined, precision

In [83]:
combined, precision = make_preds(matches_rolling, predictors + new_cols)

In [84]:
precision

0.5480427046263345

In [86]:
combined = combined.merge(matches_rolling[["Date", "Team", "Opponent", "Result"]], left_index = True, right_index = True)

In [87]:
combined

Unnamed: 0,actual,predicted,Date,Team,Opponent,Result
13,0,1,2023-01-03,Arsenal,Newcastle Utd,D
14,1,1,2023-01-15,Arsenal,Tottenham,W
15,1,1,2023-01-22,Arsenal,Manchester Utd,W
16,0,0,2023-02-04,Arsenal,Everton,L
17,0,1,2023-02-11,Arsenal,Brentford,D
...,...,...,...,...,...,...
2294,1,1,2024-02-25,Wolverhampton Wanderers,Sheffield Utd,W
2295,1,1,2024-02-25,Wolverhampton Wanderers,Sheffield Utd,W
2296,0,0,2024-03-02,Wolverhampton Wanderers,Newcastle Utd,L
2297,0,0,2024-03-02,Wolverhampton Wanderers,Newcastle Utd,L


## Combining home and away predictions

In [90]:
class MissingDict(dict):
    __missing__ = lambda self, key: key
    
map_values={
    "Brighton and Hove Albion":"Brighton",
    "Manchester United":"Manchester Utd",
    "Newcastle United":"Newcastle Utd",
    "Tottenham Hotspur":"Tottenham",
    "West Ham United":"West Ham",
    "Wolverhampton Wanderers":"Wolves"
}
mapping = MissingDict(**map_values)

In [92]:
mapping["West Ham United"]


'West Ham'

In [93]:
combined["new_team"] = combined["Team"].map(mapping)

In [94]:
combined

Unnamed: 0,actual,predicted,Date,Team,Opponent,Result,new_team
13,0,1,2023-01-03,Arsenal,Newcastle Utd,D,Arsenal
14,1,1,2023-01-15,Arsenal,Tottenham,W,Arsenal
15,1,1,2023-01-22,Arsenal,Manchester Utd,W,Arsenal
16,0,0,2023-02-04,Arsenal,Everton,L,Arsenal
17,0,1,2023-02-11,Arsenal,Brentford,D,Arsenal
...,...,...,...,...,...,...,...
2294,1,1,2024-02-25,Wolverhampton Wanderers,Sheffield Utd,W,Wolves
2295,1,1,2024-02-25,Wolverhampton Wanderers,Sheffield Utd,W,Wolves
2296,0,0,2024-03-02,Wolverhampton Wanderers,Newcastle Utd,L,Wolves
2297,0,0,2024-03-02,Wolverhampton Wanderers,Newcastle Utd,L,Wolves


In [95]:
merged = combined.merge(combined, left_on = ["Date", "new_team"], right_on = ["Date", "Opponent"])

In [96]:
merged

Unnamed: 0,actual_x,predicted_x,Date,Team_x,Opponent_x,Result_x,new_team_x,actual_y,predicted_y,Team_y,Opponent_y,Result_y,new_team_y
0,0,1,2023-01-03,Arsenal,Newcastle Utd,D,Arsenal,0,0,Newcastle United,Arsenal,D,Newcastle Utd
1,1,1,2023-01-15,Arsenal,Tottenham,W,Arsenal,0,0,Tottenham Hotspur,Arsenal,L,Tottenham
2,1,1,2023-01-22,Arsenal,Manchester Utd,W,Arsenal,0,0,Manchester United,Arsenal,L,Manchester Utd
3,0,0,2023-02-04,Arsenal,Everton,L,Arsenal,1,0,Everton,Arsenal,W,Everton
4,0,1,2023-02-11,Arsenal,Brentford,D,Arsenal,0,0,Brentford,Arsenal,D,Brentford
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4695,0,0,2024-03-02,Wolverhampton Wanderers,Newcastle Utd,L,Wolves,1,0,Newcastle United,Wolves,W,Newcastle Utd
4696,0,0,2024-03-02,Wolverhampton Wanderers,Newcastle Utd,L,Wolves,1,1,Newcastle United,Wolves,W,Newcastle Utd
4697,0,0,2024-03-02,Wolverhampton Wanderers,Newcastle Utd,L,Wolves,1,0,Newcastle United,Wolves,W,Newcastle Utd
4698,0,0,2024-03-02,Wolverhampton Wanderers,Newcastle Utd,L,Wolves,1,0,Newcastle United,Wolves,W,Newcastle Utd


In [97]:
merged[(merged["predicted_x"] ==1) & (merged["predicted_y"] ==0)]["actual_x"].value_counts()

1    649
0    411
Name: actual_x, dtype: int64