# Model Predicting the Winner before the Game with Team Features

In [1]:
import pandas as pd
import numpy as np
import dataframe_image as dfi
from sklearn.inspection import permutation_importance
from sklearn.feature_selection import SelectKBest, chi2, SelectFromModel
from sklearn.model_selection import train_test_split, cross_validate, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.naive_bayes import BernoulliNB, CategoricalNB, ComplementNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score, confusion_matrix, classification_report
pd.set_option('display.max_columns', None)   # Print all columns

In [2]:
# Load raw data
r_data = pd.read_csv('./data/data.csv')
r_data.head()

Unnamed: 0.1,Unnamed: 0,id,series,start_datetime,timezone,ground,longitude,latitude,altitude,match_length_overs,match_length_balls,max_balls_team1,max_balls_team2,team1,team2,neutral_venue,first_innings,toss_winner,toss_choice,dbMktPriceTeam1,dbMktPriceTeam2,victor,balls_team1,balls_team2,byes_team1,byes_team2,leg_byes_team1,leg_byes_team2,no_balls_team1,no_balls_team2,runs_team1,runs_team2,target_team1,target_team2,wickets_team1,wickets_team2,wides_team1,wides_team2
0,1,6690,Big Bash League 2014/15,2015-01-28 08:40:00,Australia/Sydney,Manuka Oval,149.134592,-35.318131,555,20,120,120,120,Perth Scorchers,Sydney Sixers,1,Sydney Sixers,Sydney Sixers,Bat,1.893,2.02,Perth Scorchers,120,120,0,0,2,3,2,0,148,147,148.0,,6,5,1,5
1,2,6696,Big Bash League 2014/15,2014-12-18 08:40:00,Australia/Adelaide,Adelaide Oval,138.596111,-34.915556,19,20,120,120,120,Adelaide Strikers,Melbourne Stars,0,Melbourne Stars,Melbourne Stars,Bat,,,Adelaide Strikers,75,120,1,0,2,1,0,0,149,148,149.0,,2,7,1,4
2,3,6700,Big Bash League 2014/15,2014-12-19 08:40:00,Australia/Sydney,Sydney Cricket Ground,151.224771,-33.891645,30,20,120,120,120,Sydney Sixers,Melbourne Renegades,0,Melbourne Renegades,Sydney Sixers,Bowl,,,Sydney Sixers,113,120,0,0,4,1,0,0,158,154,155.0,,2,7,2,6
3,4,6702,Big Bash League 2014/15,2014-12-20 08:10:00,Australia/Melbourne,Melbourne Cricket Ground,144.983449,-37.819967,12,20,120,120,120,Melbourne Stars,Hobart Hurricanes,0,Hobart Hurricanes,Melbourne Stars,Bowl,,,Hobart Hurricanes,120,120,0,0,2,1,0,3,138,190,191.0,,9,5,1,2
4,5,6705,Big Bash League 2014/15,2015-01-25 08:10:00,Australia/Perth,W.A.C.A. Ground,115.879576,-31.959793,7,20,120,120,120,Perth Scorchers,Melbourne Stars,0,Perth Scorchers,Perth Scorchers,Bat,1.775,2.17,Perth Scorchers,120,119,0,0,2,4,0,0,144,126,,145.0,7,10,3,4


In [3]:
# Save as an image
df = r_data[['start_datetime', 'ground', 'team1', 'team2', 'neutral_venue', 'first_innings', 'toss_winner', 'toss_choice', 'dbMktPriceTeam1', 'dbMktPriceTeam2', 'victor']].head()
# dfi.export(df, 'rawdata.png', fontsize=16, max_cols=-1, max_rows=-1)

# Preprocessing

## General preprocessing

**[Logic]**
- There are only 2 games starting at p.m., so staring time was not considered.
- Derby and month features were added.
- When victor is tie or NA, the games were removed.

In [4]:
# 1. Extract necessary columns
r_data.columns
data = r_data[['start_datetime', 'ground', 'team1', 'team2', 'neutral_venue', 'first_innings', 'toss_winner', 'toss_choice', 'dbMktPriceTeam1', 'dbMktPriceTeam2', 'victor']]
data.head()

Unnamed: 0,start_datetime,ground,team1,team2,neutral_venue,first_innings,toss_winner,toss_choice,dbMktPriceTeam1,dbMktPriceTeam2,victor
0,2015-01-28 08:40:00,Manuka Oval,Perth Scorchers,Sydney Sixers,1,Sydney Sixers,Sydney Sixers,Bat,1.893,2.02,Perth Scorchers
1,2014-12-18 08:40:00,Adelaide Oval,Adelaide Strikers,Melbourne Stars,0,Melbourne Stars,Melbourne Stars,Bat,,,Adelaide Strikers
2,2014-12-19 08:40:00,Sydney Cricket Ground,Sydney Sixers,Melbourne Renegades,0,Melbourne Renegades,Sydney Sixers,Bowl,,,Sydney Sixers
3,2014-12-20 08:10:00,Melbourne Cricket Ground,Melbourne Stars,Hobart Hurricanes,0,Hobart Hurricanes,Melbourne Stars,Bowl,,,Hobart Hurricanes
4,2015-01-25 08:10:00,W.A.C.A. Ground,Perth Scorchers,Melbourne Stars,0,Perth Scorchers,Perth Scorchers,Bat,1.775,2.17,Perth Scorchers


In [5]:
# 2. Add derby column
# The way how to choose the city from dataframe and inside apply function is different!
data['derby'] = data.apply(lambda x: 1 if x['team1'].split()[0] == x['team2'].split()[0] else 0, axis=1)
data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['derby'] = data.apply(lambda x: 1 if x['team1'].split()[0] == x['team2'].split()[0] else 0, axis=1)


Unnamed: 0,start_datetime,ground,team1,team2,neutral_venue,first_innings,toss_winner,toss_choice,dbMktPriceTeam1,dbMktPriceTeam2,victor,derby
0,2015-01-28 08:40:00,Manuka Oval,Perth Scorchers,Sydney Sixers,1,Sydney Sixers,Sydney Sixers,Bat,1.893,2.02,Perth Scorchers,0
1,2014-12-18 08:40:00,Adelaide Oval,Adelaide Strikers,Melbourne Stars,0,Melbourne Stars,Melbourne Stars,Bat,,,Adelaide Strikers,0
2,2014-12-19 08:40:00,Sydney Cricket Ground,Sydney Sixers,Melbourne Renegades,0,Melbourne Renegades,Sydney Sixers,Bowl,,,Sydney Sixers,0
3,2014-12-20 08:10:00,Melbourne Cricket Ground,Melbourne Stars,Hobart Hurricanes,0,Hobart Hurricanes,Melbourne Stars,Bowl,,,Hobart Hurricanes,0
4,2015-01-25 08:10:00,W.A.C.A. Ground,Perth Scorchers,Melbourne Stars,0,Perth Scorchers,Perth Scorchers,Bat,1.775,2.17,Perth Scorchers,0


In [6]:
# 3. Extract month of start_datetime
data['start_month'] = data['start_datetime'].str.split('-').str[1]
data = data.drop('start_datetime', axis=1)
data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['start_month'] = data['start_datetime'].str.split('-').str[1]


Unnamed: 0,ground,team1,team2,neutral_venue,first_innings,toss_winner,toss_choice,dbMktPriceTeam1,dbMktPriceTeam2,victor,derby,start_month
0,Manuka Oval,Perth Scorchers,Sydney Sixers,1,Sydney Sixers,Sydney Sixers,Bat,1.893,2.02,Perth Scorchers,0,1
1,Adelaide Oval,Adelaide Strikers,Melbourne Stars,0,Melbourne Stars,Melbourne Stars,Bat,,,Adelaide Strikers,0,12
2,Sydney Cricket Ground,Sydney Sixers,Melbourne Renegades,0,Melbourne Renegades,Sydney Sixers,Bowl,,,Sydney Sixers,0,12
3,Melbourne Cricket Ground,Melbourne Stars,Hobart Hurricanes,0,Hobart Hurricanes,Melbourne Stars,Bowl,,,Hobart Hurricanes,0,12
4,W.A.C.A. Ground,Perth Scorchers,Melbourne Stars,0,Perth Scorchers,Perth Scorchers,Bat,1.775,2.17,Perth Scorchers,0,1


In [7]:
# 4. Add home_venue
home_venue = data['neutral_venue'].map({1:0, 0:1})
data.insert(4, 'home_venue', home_venue)
data

Unnamed: 0,ground,team1,team2,neutral_venue,home_venue,first_innings,toss_winner,toss_choice,dbMktPriceTeam1,dbMktPriceTeam2,victor,derby,start_month
0,Manuka Oval,Perth Scorchers,Sydney Sixers,1,0,Sydney Sixers,Sydney Sixers,Bat,1.893,2.020,Perth Scorchers,0,01
1,Adelaide Oval,Adelaide Strikers,Melbourne Stars,0,1,Melbourne Stars,Melbourne Stars,Bat,,,Adelaide Strikers,0,12
2,Sydney Cricket Ground,Sydney Sixers,Melbourne Renegades,0,1,Melbourne Renegades,Sydney Sixers,Bowl,,,Sydney Sixers,0,12
3,Melbourne Cricket Ground,Melbourne Stars,Hobart Hurricanes,0,1,Hobart Hurricanes,Melbourne Stars,Bowl,,,Hobart Hurricanes,0,12
4,W.A.C.A. Ground,Perth Scorchers,Melbourne Stars,0,1,Perth Scorchers,Perth Scorchers,Bat,1.775,2.170,Perth Scorchers,0,01
...,...,...,...,...,...,...,...,...,...,...,...,...,...
485,Docklands Stadium,Perth Scorchers,Sydney Sixers,1,0,Perth Scorchers,Perth Scorchers,Bat,1.654,2.360,Perth Scorchers,0,01
486,Melbourne Cricket Ground,Adelaide Strikers,Hobart Hurricanes,1,0,Adelaide Strikers,Adelaide Strikers,Bat,1.769,2.150,Adelaide Strikers,0,01
487,Melbourne Cricket Ground,Adelaide Strikers,Sydney Thunder,1,0,Adelaide Strikers,Adelaide Strikers,Bat,2.190,1.746,Adelaide Strikers,0,01
488,Sydney Cricket Ground,Sydney Sixers,Adelaide Strikers,0,1,Adelaide Strikers,Sydney Sixers,Bowl,2.230,1.719,Sydney Sixers,0,01


In [8]:
# 5. Drop rows if victor is NA
data['victor'].isnull().sum()      # 6 instances
data = data.dropna(subset=['victor'], axis=0)
data

Unnamed: 0,ground,team1,team2,neutral_venue,home_venue,first_innings,toss_winner,toss_choice,dbMktPriceTeam1,dbMktPriceTeam2,victor,derby,start_month
0,Manuka Oval,Perth Scorchers,Sydney Sixers,1,0,Sydney Sixers,Sydney Sixers,Bat,1.893,2.020,Perth Scorchers,0,01
1,Adelaide Oval,Adelaide Strikers,Melbourne Stars,0,1,Melbourne Stars,Melbourne Stars,Bat,,,Adelaide Strikers,0,12
2,Sydney Cricket Ground,Sydney Sixers,Melbourne Renegades,0,1,Melbourne Renegades,Sydney Sixers,Bowl,,,Sydney Sixers,0,12
3,Melbourne Cricket Ground,Melbourne Stars,Hobart Hurricanes,0,1,Hobart Hurricanes,Melbourne Stars,Bowl,,,Hobart Hurricanes,0,12
4,W.A.C.A. Ground,Perth Scorchers,Melbourne Stars,0,1,Perth Scorchers,Perth Scorchers,Bat,1.775,2.170,Perth Scorchers,0,01
...,...,...,...,...,...,...,...,...,...,...,...,...,...
485,Docklands Stadium,Perth Scorchers,Sydney Sixers,1,0,Perth Scorchers,Perth Scorchers,Bat,1.654,2.360,Perth Scorchers,0,01
486,Melbourne Cricket Ground,Adelaide Strikers,Hobart Hurricanes,1,0,Adelaide Strikers,Adelaide Strikers,Bat,1.769,2.150,Adelaide Strikers,0,01
487,Melbourne Cricket Ground,Adelaide Strikers,Sydney Thunder,1,0,Adelaide Strikers,Adelaide Strikers,Bat,2.190,1.746,Adelaide Strikers,0,01
488,Sydney Cricket Ground,Sydney Sixers,Adelaide Strikers,0,1,Adelaide Strikers,Sydney Sixers,Bowl,2.230,1.719,Sydney Sixers,0,01


In [9]:
# 6. Drop rows if victor is Tie
(data['victor'] == 'Tie').sum()      # 4 instances
data = data.drop(data[data['victor'] == 'Tie'].index, axis=0)
data

Unnamed: 0,ground,team1,team2,neutral_venue,home_venue,first_innings,toss_winner,toss_choice,dbMktPriceTeam1,dbMktPriceTeam2,victor,derby,start_month
0,Manuka Oval,Perth Scorchers,Sydney Sixers,1,0,Sydney Sixers,Sydney Sixers,Bat,1.893,2.020,Perth Scorchers,0,01
1,Adelaide Oval,Adelaide Strikers,Melbourne Stars,0,1,Melbourne Stars,Melbourne Stars,Bat,,,Adelaide Strikers,0,12
2,Sydney Cricket Ground,Sydney Sixers,Melbourne Renegades,0,1,Melbourne Renegades,Sydney Sixers,Bowl,,,Sydney Sixers,0,12
3,Melbourne Cricket Ground,Melbourne Stars,Hobart Hurricanes,0,1,Hobart Hurricanes,Melbourne Stars,Bowl,,,Hobart Hurricanes,0,12
4,W.A.C.A. Ground,Perth Scorchers,Melbourne Stars,0,1,Perth Scorchers,Perth Scorchers,Bat,1.775,2.170,Perth Scorchers,0,01
...,...,...,...,...,...,...,...,...,...,...,...,...,...
485,Docklands Stadium,Perth Scorchers,Sydney Sixers,1,0,Perth Scorchers,Perth Scorchers,Bat,1.654,2.360,Perth Scorchers,0,01
486,Melbourne Cricket Ground,Adelaide Strikers,Hobart Hurricanes,1,0,Adelaide Strikers,Adelaide Strikers,Bat,1.769,2.150,Adelaide Strikers,0,01
487,Melbourne Cricket Ground,Adelaide Strikers,Sydney Thunder,1,0,Adelaide Strikers,Adelaide Strikers,Bat,2.190,1.746,Adelaide Strikers,0,01
488,Sydney Cricket Ground,Sydney Sixers,Adelaide Strikers,0,1,Adelaide Strikers,Sydney Sixers,Bowl,2.230,1.719,Sydney Sixers,0,01


In [10]:
# 7. Dummy variables for start_month
month = pd.get_dummies(data['start_month'], prefix='month', drop_first=True)   # k categories -> k-1 dummies
data = pd.concat([data, month], axis=1)
data.head()

Unnamed: 0,ground,team1,team2,neutral_venue,home_venue,first_innings,toss_winner,toss_choice,dbMktPriceTeam1,dbMktPriceTeam2,victor,derby,start_month,month_02,month_12
0,Manuka Oval,Perth Scorchers,Sydney Sixers,1,0,Sydney Sixers,Sydney Sixers,Bat,1.893,2.02,Perth Scorchers,0,1,0,0
1,Adelaide Oval,Adelaide Strikers,Melbourne Stars,0,1,Melbourne Stars,Melbourne Stars,Bat,,,Adelaide Strikers,0,12,0,1
2,Sydney Cricket Ground,Sydney Sixers,Melbourne Renegades,0,1,Melbourne Renegades,Sydney Sixers,Bowl,,,Sydney Sixers,0,12,0,1
3,Melbourne Cricket Ground,Melbourne Stars,Hobart Hurricanes,0,1,Hobart Hurricanes,Melbourne Stars,Bowl,,,Hobart Hurricanes,0,12,0,1
4,W.A.C.A. Ground,Perth Scorchers,Melbourne Stars,0,1,Perth Scorchers,Perth Scorchers,Bat,1.775,2.17,Perth Scorchers,0,1,0,0


In [11]:
# 8. Drop rows if dbMktPrice is NaN to compare with Benchmark
data = data.dropna(subset=['dbMktPriceTeam1', 'dbMktPriceTeam2'])     # 9 instances dropped
data.head()

Unnamed: 0,ground,team1,team2,neutral_venue,home_venue,first_innings,toss_winner,toss_choice,dbMktPriceTeam1,dbMktPriceTeam2,victor,derby,start_month,month_02,month_12
0,Manuka Oval,Perth Scorchers,Sydney Sixers,1,0,Sydney Sixers,Sydney Sixers,Bat,1.893,2.02,Perth Scorchers,0,1,0,0
4,W.A.C.A. Ground,Perth Scorchers,Melbourne Stars,0,1,Perth Scorchers,Perth Scorchers,Bat,1.775,2.17,Perth Scorchers,0,1,0,0
5,Adelaide Oval,Adelaide Strikers,Hobart Hurricanes,0,1,Hobart Hurricanes,Hobart Hurricanes,Bat,2.22,1.741,Adelaide Strikers,0,12,0,1
6,Docklands Stadium,Melbourne Renegades,Sydney Thunder,0,1,Sydney Thunder,Melbourne Renegades,Bowl,1.69,2.31,Melbourne Renegades,0,12,0,1
7,Sydney Cricket Ground,Sydney Sixers,Perth Scorchers,0,1,Perth Scorchers,Perth Scorchers,Bat,1.633,2.43,Perth Scorchers,0,12,0,1


In [12]:
# Save the dataframe as an image
df = data.drop(columns=['ground', 'start_month']).head()
# dfi.export(df, 'generalprocessed.png', fontsize=16, max_cols=-1, max_rows=-1)

In [13]:
# Export the preprocessed file
data.to_csv('./data/pjt02_data_processed.csv')

## Analysis-specfic preprocessing
A game data is converted in the view of each team and the size gets doubled.

**For team 1**

In [14]:
# Convert first_innings into binary if team1 did the first inning
t1 = data.copy()
t1['first_innings'] = t1.apply(lambda x: 1 if x['team1'] == x['first_innings'] else 0, axis=1)
t1.head()

Unnamed: 0,ground,team1,team2,neutral_venue,home_venue,first_innings,toss_winner,toss_choice,dbMktPriceTeam1,dbMktPriceTeam2,victor,derby,start_month,month_02,month_12
0,Manuka Oval,Perth Scorchers,Sydney Sixers,1,0,0,Sydney Sixers,Bat,1.893,2.02,Perth Scorchers,0,1,0,0
4,W.A.C.A. Ground,Perth Scorchers,Melbourne Stars,0,1,1,Perth Scorchers,Bat,1.775,2.17,Perth Scorchers,0,1,0,0
5,Adelaide Oval,Adelaide Strikers,Hobart Hurricanes,0,1,0,Hobart Hurricanes,Bat,2.22,1.741,Adelaide Strikers,0,12,0,1
6,Docklands Stadium,Melbourne Renegades,Sydney Thunder,0,1,0,Melbourne Renegades,Bowl,1.69,2.31,Melbourne Renegades,0,12,0,1
7,Sydney Cricket Ground,Sydney Sixers,Perth Scorchers,0,1,0,Perth Scorchers,Bat,1.633,2.43,Perth Scorchers,0,12,0,1


In [15]:
# Convert toss_winner into binary if team1 won the toss
t1['toss_winner'] = t1.apply(lambda x: 1 if x['team1'] == x['toss_winner'] else 0, axis=1)
t1.head()

Unnamed: 0,ground,team1,team2,neutral_venue,home_venue,first_innings,toss_winner,toss_choice,dbMktPriceTeam1,dbMktPriceTeam2,victor,derby,start_month,month_02,month_12
0,Manuka Oval,Perth Scorchers,Sydney Sixers,1,0,0,0,Bat,1.893,2.02,Perth Scorchers,0,1,0,0
4,W.A.C.A. Ground,Perth Scorchers,Melbourne Stars,0,1,1,1,Bat,1.775,2.17,Perth Scorchers,0,1,0,0
5,Adelaide Oval,Adelaide Strikers,Hobart Hurricanes,0,1,0,0,Bat,2.22,1.741,Adelaide Strikers,0,12,0,1
6,Docklands Stadium,Melbourne Renegades,Sydney Thunder,0,1,0,1,Bowl,1.69,2.31,Melbourne Renegades,0,12,0,1
7,Sydney Cricket Ground,Sydney Sixers,Perth Scorchers,0,1,0,0,Bat,1.633,2.43,Perth Scorchers,0,12,0,1


In [16]:
# Convert victory into binary if team1 won the game
t1['victor'] = t1.apply(lambda x: 1 if x['team1'] == x['victor'] else 0, axis=1)
t1.head()

Unnamed: 0,ground,team1,team2,neutral_venue,home_venue,first_innings,toss_winner,toss_choice,dbMktPriceTeam1,dbMktPriceTeam2,victor,derby,start_month,month_02,month_12
0,Manuka Oval,Perth Scorchers,Sydney Sixers,1,0,0,0,Bat,1.893,2.02,1,0,1,0,0
4,W.A.C.A. Ground,Perth Scorchers,Melbourne Stars,0,1,1,1,Bat,1.775,2.17,1,0,1,0,0
5,Adelaide Oval,Adelaide Strikers,Hobart Hurricanes,0,1,0,0,Bat,2.22,1.741,1,0,12,0,1
6,Docklands Stadium,Melbourne Renegades,Sydney Thunder,0,1,0,1,Bowl,1.69,2.31,1,0,12,0,1
7,Sydney Cricket Ground,Sydney Sixers,Perth Scorchers,0,1,0,0,Bat,1.633,2.43,0,0,12,0,1


In [17]:
# Dummy variable for teams
t1 = pd.get_dummies(t1, columns=['team1', 'team2'], prefix=['', 'op'], prefix_sep=['', '_'])
t1.head()

Unnamed: 0,ground,neutral_venue,home_venue,first_innings,toss_winner,toss_choice,dbMktPriceTeam1,dbMktPriceTeam2,victor,derby,start_month,month_02,month_12,Adelaide Strikers,Brisbane Heat,Hobart Hurricanes,Melbourne Renegades,Melbourne Stars,Perth Scorchers,Sydney Sixers,Sydney Thunder,op_Adelaide Strikers,op_Brisbane Heat,op_Hobart Hurricanes,op_Melbourne Renegades,op_Melbourne Stars,op_Perth Scorchers,op_Sydney Sixers,op_Sydney Thunder
0,Manuka Oval,1,0,0,0,Bat,1.893,2.02,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
4,W.A.C.A. Ground,0,1,1,1,Bat,1.775,2.17,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0
5,Adelaide Oval,0,1,0,0,Bat,2.22,1.741,1,0,12,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
6,Docklands Stadium,0,1,0,1,Bowl,1.69,2.31,1,0,12,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
7,Sydney Cricket Ground,0,1,0,0,Bat,1.633,2.43,0,0,12,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0


In [18]:
# Extract only necessary columns by drop unnecessary columns
t1 = t1.drop(['ground', 'toss_choice', 'dbMktPriceTeam1', 'dbMktPriceTeam2', 'start_month'], axis=1)
t1.head()

Unnamed: 0,neutral_venue,home_venue,first_innings,toss_winner,victor,derby,month_02,month_12,Adelaide Strikers,Brisbane Heat,Hobart Hurricanes,Melbourne Renegades,Melbourne Stars,Perth Scorchers,Sydney Sixers,Sydney Thunder,op_Adelaide Strikers,op_Brisbane Heat,op_Hobart Hurricanes,op_Melbourne Renegades,op_Melbourne Stars,op_Perth Scorchers,op_Sydney Sixers,op_Sydney Thunder
0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
4,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0
5,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
6,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
7,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0


In [19]:
# Do the same for team2
# Column conversion for team2
t2 = data.copy()
t2['first_innings'] = t2.apply(lambda x: 1 if x['team2'] == x['first_innings'] else 0, axis=1)
t2['toss_winner'] = t2.apply(lambda x: 1 if x['team2'] == x['toss_winner'] else 0, axis=1)
t2['victor'] = t2.apply(lambda x: 1 if x['team2'] == x['victor'] else 0, axis=1)

# Additional process for team2: 0 for home_venue
t2['home_venue'] = 0

# Dummy variable for teams
t2 = pd.get_dummies(t2, columns=['team2', 'team1'], prefix=['', 'op'], prefix_sep=['', '_'])

# Extract only necessary columns
t2 = t2.drop(['ground', 'toss_choice', 'dbMktPriceTeam1', 'dbMktPriceTeam2', 'start_month'], axis=1)
t2.head()

Unnamed: 0,neutral_venue,home_venue,first_innings,toss_winner,victor,derby,month_02,month_12,Adelaide Strikers,Brisbane Heat,Hobart Hurricanes,Melbourne Renegades,Melbourne Stars,Perth Scorchers,Sydney Sixers,Sydney Thunder,op_Adelaide Strikers,op_Brisbane Heat,op_Hobart Hurricanes,op_Melbourne Renegades,op_Melbourne Stars,op_Perth Scorchers,op_Sydney Sixers,op_Sydney Thunder
0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0
5,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0
6,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
7,0,0,1,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0


In [20]:
# Combine team1 and team2
data2 = pd.concat([t1, t2], axis=0, ignore_index=True)
data2.head()

Unnamed: 0,neutral_venue,home_venue,first_innings,toss_winner,victor,derby,month_02,month_12,Adelaide Strikers,Brisbane Heat,Hobart Hurricanes,Melbourne Renegades,Melbourne Stars,Perth Scorchers,Sydney Sixers,Sydney Thunder,op_Adelaide Strikers,op_Brisbane Heat,op_Hobart Hurricanes,op_Melbourne Renegades,op_Melbourne Stars,op_Perth Scorchers,op_Sydney Sixers,op_Sydney Thunder
0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0
2,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
3,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
4,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0


In [21]:
# Save the dataframe as an image
# dfi.export(data2.iloc[:5, :16], './figure/02specificpreprocessing1.png', max_cols=-1, max_rows=-1)
# dfi.export(data2.iloc[:5, 16:], './figure/02specificpreprocessing2.png', max_cols=-1, max_rows=-1)

In [22]:
# Train-test split
X = data2.drop('victor', axis=1)
y = data2['victor']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train.shape, y_test.shape

((753, 23), (189,))

# Benchmark

##  Null F1 score

In [40]:
# Victor 0 and 1 are 471 instances for each - both are equally frequent 
y.value_counts()
f1_score(y, pd.Series([0]*942), pos_label=0)

0.6666666666666666

## Marketprice benchmark
Calculated before the data augmentation with 471 instances

In [46]:
# Benchmark prediction of winning team
# 1) Winning probability: get the reciprocal of dbMktPriceTeamX and normalize
data['probaTeam1'] = data.apply(lambda x: (1/x['dbMktPriceTeam1']) / (1/x['dbMktPriceTeam1'] + 1/x['dbMktPriceTeam2']), axis=1)
data['probaTeam2'] = data.apply(lambda x: (1/x['dbMktPriceTeam2']) / (1/x['dbMktPriceTeam1'] + 1/x['dbMktPriceTeam2']), axis=1)

# 2) Prediction of victor
data['benchmark'] = np.where(data['probaTeam1'] > data['probaTeam2'], data['team1'], data['team2'])
data['benchmark'] = np.where(data['probaTeam1'].isnull(), np.nan, data['benchmark'])     # where the probability is NaN: 9 instances
data.head()

Unnamed: 0,ground,team1,team2,neutral_venue,home_venue,first_innings,toss_winner,toss_choice,dbMktPriceTeam1,dbMktPriceTeam2,victor,derby,start_month,month_02,month_12,probaTeam1,probaTeam2,benchmark
0,Manuka Oval,Perth Scorchers,Sydney Sixers,1,0,Sydney Sixers,Sydney Sixers,Bat,1.893,2.02,Perth Scorchers,0,1,0,0,0.516228,0.483772,Perth Scorchers
4,W.A.C.A. Ground,Perth Scorchers,Melbourne Stars,0,1,Perth Scorchers,Perth Scorchers,Bat,1.775,2.17,Perth Scorchers,0,1,0,0,0.550063,0.449937,Perth Scorchers
5,Adelaide Oval,Adelaide Strikers,Hobart Hurricanes,0,1,Hobart Hurricanes,Hobart Hurricanes,Bat,2.22,1.741,Adelaide Strikers,0,12,0,1,0.439535,0.560465,Hobart Hurricanes
6,Docklands Stadium,Melbourne Renegades,Sydney Thunder,0,1,Sydney Thunder,Melbourne Renegades,Bowl,1.69,2.31,Melbourne Renegades,0,12,0,1,0.5775,0.4225,Melbourne Renegades
7,Sydney Cricket Ground,Sydney Sixers,Perth Scorchers,0,1,Perth Scorchers,Perth Scorchers,Bat,1.633,2.43,Perth Scorchers,0,12,0,1,0.59808,0.40192,Sydney Sixers


In [47]:
# Evaluation of benchmark
# F1-score of benchmark prediction ->> around 0.51
print(confusion_matrix(data['victor'], data['benchmark']))
print()
print(classification_report(data['victor'], data['benchmark']))    
f1_score(data['victor'], data['benchmark'], average='micro')

[[29  5  5  5  3  6  4  3]
 [ 5 16  6  3  7  5  3  5]
 [ 6  4 29  4  2  3  5  4]
 [ 5  5  5 20  5  3  1  4]
 [ 1  0  4  2 54  2  0  1]
 [ 3  3  1  7  4 51  6  0]
 [ 8  8  3  4  8  7 26  5]
 [ 5  2  4  3  4  7  3 20]]

                     precision    recall  f1-score   support

  Adelaide Strikers       0.47      0.48      0.48        60
      Brisbane Heat       0.37      0.32      0.34        50
  Hobart Hurricanes       0.51      0.51      0.51        57
Melbourne Renegades       0.42      0.42      0.42        48
    Melbourne Stars       0.62      0.84      0.72        64
    Perth Scorchers       0.61      0.68      0.64        75
      Sydney Sixers       0.54      0.38      0.44        69
     Sydney Thunder       0.48      0.42      0.44        48

           accuracy                           0.52       471
          macro avg       0.50      0.51      0.50       471
       weighted avg       0.51      0.52      0.51       471



0.5201698513800425

# Feature Selection

## Select K best by chi-squared 
Choose only from 1st ~ 7th features and include team dummy features all the time

-> P-values are too big, so would not use chi-squared measure

In [51]:
# Chi-square score for all features
SelectKBest(chi2, k='all').fit(X_train, y_train).scores_[:7]   

array([6.22926201e-04, 5.34960324e-01, 3.23348210e-02, 4.82075299e-01,
       2.99832094e-03, 1.23655795e-01, 4.23544328e-04])

In [49]:
# P-values of all features
SelectKBest(chi2, k='all').fit(X_train, y_train).pvalues_[:7]   # Values are too big to choose features

array([0.98008807, 0.46452887, 0.85729478, 0.48748379, 0.95633212,
       0.72510282, 0.98358054])

# Classification Analysis

## Logistic Regression

### Permutation importance
with Permutation importance: importances_mean shoul be at least twice bigger than importances_std
- the result changes depending on random_state, so it is always set to 0 for consistency.

In [100]:
# Feature selection
logr = LogisticRegression().fit(X_train, y_train)
result = permutation_importance(logr, X_train, y_train, scoring='f1', n_repeats=10, random_state=0)  

for i in result.importances_mean.argsort()[::-1]:
    if result.importances_mean[i] - 2*result.importances_std[i] > 0:
        print(i, X_train.columns[i], result.importances_mean[i]) 

1 home_venue 0.013920514563959385


In [53]:
# Cross validation
X_team = X.iloc[:, 7:23]
X_2 = X.iloc[:, [1]]
X_2_team = pd.concat([X_2, X_team], axis=1)
cross_validate(logr, X_2_team, y, cv=10, scoring='f1')['test_score'].mean()

0.5577610446710428

### Select from model
Select the feature set based on the coef_ or feature_importances of the estimator.

In [54]:
logr = LogisticRegression()
selector = SelectFromModel(logr).fit(X_train, y_train)
selector.get_support()[:7]       # No feature to choose among feature 1~7

array([False, False, False, False, False, False, False])

### Use all features
For comparison purpose to see if feature selection is effective.

In [55]:
cross_validate(logr, X, y, cv=10, scoring='f1')['test_score'].mean()

0.5429637576040822

## SVM

### Permutation importance

**Kernel = 'rbf' (default)**

In [56]:
svc = SVC().fit(X_train, y_train)
result = permutation_importance(svc, X_train, y_train, scoring='f1', n_repeats=10, random_state=0)

for i in result.importances_mean.argsort()[::-1]:
    if result.importances_mean[i] - 2*result.importances_std[i] > 0:
        print(i, X_train.columns[i], result.importances_mean[i])

1 home_venue 0.03173052277534759
6 month_12 0.031070446116466054
0 neutral_venue 0.024013266282125245
2 first_innings 0.021976148737199053
12 Perth Scorchers 0.019888579091699443
22 op_Sydney Thunder 0.01783140231306244
3 toss_winner 0.017751790793698298
13 Sydney Sixers 0.017324741341006977
17 op_Hobart Hurricanes 0.01607905350419696
14 Sydney Thunder 0.0151562128395465
7 Adelaide Strikers 0.013120372950251157
9 Hobart Hurricanes 0.012053051182365725
8 Brisbane Heat 0.011058641220032717
21 op_Sydney Sixers 0.009542029224268967
19 op_Melbourne Stars 0.008980402927432551
5 month_02 0.005995006414935023


In [57]:
# Use all features except feature 5
X_ex_5 = X.drop(X.columns[4], axis=1)
cross_validate(svc, X_ex_5, y, cv=10, scoring='f1')['test_score'].mean()

0.5380151358130404

**Kernel = 'linear'**

In [58]:
svc = SVC(kernel='linear').fit(X_train, y_train)
result = permutation_importance(svc, X_train, y_train, scoring='f1', n_repeats=10, random_state=0)

for i in result.importances_mean.argsort()[::-1]:
    if result.importances_mean[i] - 2*result.importances_std[i] > 0:
        print(i, X_train.columns[i], result.importances_mean[i])    # No features from 1st ~ 7th were selected

12 Perth Scorchers 0.024022455305721658
20 op_Perth Scorchers 0.01681029910399816
21 op_Sydney Sixers 0.01642503144617623
19 op_Melbourne Stars 0.015675697664465027
15 op_Adelaide Strikers 0.014838174092169764
13 Sydney Sixers 0.012683632977028347
7 Adelaide Strikers 0.0073727119605238815


### Select from model

In [59]:
svc = SVC(kernel='linear')      # To use coef_ attribute 
selector = SelectFromModel(svc).fit(X_train, y_train)
selector.get_support()[:7]     

array([False, False, False, False, False, False, False])

### Use all features

In [60]:
cross_validate(svc, X, y, cv=10, scoring='f1')['test_score'].mean()

0.5014646782394104

## Decision Tree

### Permutation importance

In [61]:
dt = DecisionTreeClassifier(criterion='entropy', max_depth=5, min_samples_leaf=5, random_state=0).fit(X_train, y_train)
result = permutation_importance(dt, X_train, y_train, scoring='f1', n_repeats=10, random_state=3)

for i in result.importances_mean.argsort()[::-1]:
    if result.importances_mean[i] - 2*result.importances_std[i] > 0:
        print(i, X_train.columns[i], result.importances_mean[i])

12 Perth Scorchers 0.06258980891536393
1 home_venue 0.045087849810286865
13 Sydney Sixers 0.044681956935265596
7 Adelaide Strikers 0.0332641918913755
22 op_Sydney Thunder 0.030373572903728198
0 neutral_venue 0.022696267013639852
17 op_Hobart Hurricanes 0.0159941340223712
2 first_innings 0.006956659273155768


In [62]:
# Use feature 1, 2, 3
X_123 = X.iloc[:, [0, 1, 2]]
X_123_team = pd.concat([X_123, X_team], axis=1)

dt = DecisionTreeClassifier(criterion='entropy', max_depth=5, min_samples_leaf=5, random_state=0)
cross_validate(dt, X_123_team, y, cv=10, scoring='f1')['test_score'].mean()

0.5090520428646678

### Select from model

In [63]:
dt = DecisionTreeClassifier(criterion='entropy', max_depth=5, min_samples_leaf=5, random_state=0)
selector = SelectFromModel(dt).fit(X_train, y_train)
selector.get_support()[:7]    # Feature 1, 2, 3 -> same as above

array([ True,  True,  True, False, False, False, False])

### Use all features

In [64]:
cross_validate(dt, X, y, cv=10, scoring='f1')['test_score'].mean()

0.5026268781161372

## AdaBoost

### Permutation importance

In [65]:
ada = AdaBoostClassifier(n_estimators=100, random_state=0).fit(X_train, y_train)
result = permutation_importance(ada, X_train, y_train, scoring='f1', n_repeats=10, random_state=0)

for i in result.importances_mean.argsort()[::-1]:
    if result.importances_mean[i] - 2*result.importances_std[i] > 0:
        print(i, X_train.columns[i], result.importances_mean[i])

12 Perth Scorchers 0.028256109935722095
1 home_venue 0.014094472346429809
21 op_Sydney Sixers 0.010222003560736525


In [66]:
# Use feature 2
cross_validate(ada, X_2_team, y, cv=10, scoring='f1')['test_score'].mean()

0.5577610446710428

### Select from model

In [67]:
ada = AdaBoostClassifier(n_estimators=100, random_state=0)
selector = SelectFromModel(ada).fit(X_train, y_train)
selector.get_support()[:7]      

array([ True,  True, False, False,  True, False, False])

In [68]:
# Use feature 1, 2, 5
X_125 = X.iloc[:, [0, 1, 4]]
X_125_team = pd.concat([X_125, X_team], axis=1)

cross_validate(ada, X_125_team, y, cv=10, scoring='f1')['test_score'].mean()

0.5582824152158896

### Use all features

In [69]:
cross_validate(ada, X, y, cv=10, scoring='f1')['test_score'].mean()

0.5419025331142864

## Random Forest

### Permutation importance

In [70]:
rf = RandomForestClassifier(criterion='entropy', max_depth=5, min_samples_leaf=5, random_state=0, max_samples=200).fit(X_train, y_train)
result = permutation_importance(rf, X_train, y_train, scoring='f1', n_repeats=10, random_state=0)

for i in result.importances_mean.argsort()[::-1]:
    if result.importances_mean[i] - 2*result.importances_std[i] > 0:
        print(i, X_train.columns[i], result.importances_mean[i])

12 Perth Scorchers 0.02585934029651794
1 home_venue 0.015993783178969177
6 month_12 0.014680543932718781
16 op_Brisbane Heat 0.013434836686423845
14 Sydney Thunder 0.011988040994439387
22 op_Sydney Thunder 0.008081572745959198
0 neutral_venue 0.0063456551770244675


In [104]:
# Use feature 1, 2
X_12 = X.iloc[:, [0, 1]]
X_12_team = pd.concat([X_12, X_team], axis=1)

rf = RandomForestClassifier(criterion='entropy', max_depth=5, min_samples_leaf=5, random_state=0, max_samples=200)
cross_validate(rf, X_12_team, y, cv=10, scoring='f1')['test_score'].mean()

0.5645933873797064

In [103]:
# Use feature 1, 2, 6, 7
X_1267 = X.iloc[:, [0, 1, 5, 6]]
X_1267_team = pd.concat([X_1267, X_team], axis=1)

rf = RandomForestClassifier(criterion='entropy', max_depth=5, min_samples_leaf=5, random_state=0, max_samples=200)
cross_validate(rf, X_1267_team, y, cv=10, scoring='f1')['test_score'].mean()

0.5525961822057388

### Select from model

In [72]:
rf = RandomForestClassifier(criterion='entropy', max_depth=5, min_samples_leaf=5, random_state=0, max_samples=200)
selector = SelectFromModel(rf).fit(X_train, y_train)
selector.get_support()[:7]

array([False,  True,  True,  True, False, False,  True])

In [105]:
# Use feature 2, 3, 4
X_234 = X.iloc[:, [1, 2, 3]]
X_234_team = pd.concat([X_234, X_team], axis=1)
cross_validate(rf, X_234_team, y, cv=10, scoring='f1')['test_score'].mean()

0.5552300306498891

In [106]:
# Use feature 2, 3, 4, 6, 7
X_23467 = X.iloc[:, [1, 2, 3, 5, 6]]
X_23467_team = pd.concat([X_23467, X_team], axis=1)
cross_validate(rf, X_23467_team, y, cv=10, scoring='f1')['test_score'].mean()

0.5404200859139129

### Use all features

In [74]:
cross_validate(rf, X, y, cv=10, scoring='f1')['test_score'].mean()

0.5501298432583838

## Bernoulli NB
for Multivariate Bernoulli models wigh binary features

### Permutation importance

In [109]:
bnb = BernoulliNB(binarize=None).fit(X_train, y_train)
result = permutation_importance(bnb, X_train, y_train, scoring='f1', n_repeats=10, random_state=0)

for i in result.importances_mean.argsort()[::-1]:
    if result.importances_mean[i] - 2*result.importances_std[i] > 0:
        print(i, X_train.columns[i], result.importances_mean[i])    # No results returned

### Select from model
BernoulliNB class doesn't have coef_, feature_importances_ attribute. Thus, cannot use SelectFromModel class.

### Use all features

In [76]:
cross_validate(BernoulliNB(binarize=None), X, y, cv=10, scoring='f1')['test_score'].mean()

0.5520115662230448

## Categorical NB
The results are same as BernoulliNB if the features are binary.

### Permutation importance

In [112]:
ca_nb = CategoricalNB().fit(X_train, y_train)
result = permutation_importance(ca_nb, X_train, y_train, scoring='f1', n_repeats=10, random_state=0)

for i in result.importances_mean.argsort()[::-1]:
    if result.importances_mean[i] - 2*result.importances_std[i] > 0:
        print(i, X_train.columns[i], result.importances_mean[i])   # No feature returned

### Select from model
Cannot use due to the same reason with BernoulliNB class

### Use all features

In [113]:
cross_validate(CategoricalNB(), X, y, cv=10, scoring='f1')['test_score'].mean()

0.5520115662230448

## Complement NB

### Permutation importance

In [79]:
co_nb = ComplementNB().fit(X_train, y_train)
result = permutation_importance(co_nb, X_train, y_train, scoring='f1', n_repeats=10, random_state=0)

for i in result.importances_mean.argsort()[::-1]:
    if result.importances_mean[i] - 2*result.importances_std[i] > 0:
        print(i, X_train.columns[i], result.importances_mean[i])

22 op_Sydney Thunder 0.01336416766913261
1 home_venue 0.008643197862946206
15 op_Adelaide Strikers 0.006093200370973395


In [80]:
# Use feature 2
cross_validate(ComplementNB(), X_2_team, y, cv=10, scoring='f1')['test_score'].mean()

0.5610334240680519

### Select from model
Cannot use due to the same reason with BernoulliNB class

### Use all features

In [81]:
cross_validate(ComplementNB(), X, y, cv=10, scoring='f1')['test_score'].mean()

0.5458471847764346

## KNN

### Permutation importance

In [115]:
knn = KNeighborsClassifier(weights='distance').fit(X_train, y_train)
result = permutation_importance(knn, X_train, y_train, scoring='f1', n_repeats=10, random_state=0)

for i in result.importances_mean.argsort()[::-1]:
    if result.importances_mean[i] - 2*result.importances_std[i] > 0:
        print(i, X_train.columns[i], result.importances_mean[i]) 

2 first_innings 0.1490465565295132
3 toss_winner 0.13766084694748185
6 month_12 0.13292483892392631
1 home_venue 0.13092053813670662
0 neutral_venue 0.07140776058402565
7 Adelaide Strikers 0.05781053707303456
18 op_Melbourne Renegades 0.052129965461028584
13 Sydney Sixers 0.05150220598826525
21 op_Sydney Sixers 0.049575263574420325
11 Melbourne Stars 0.04808301353604348
20 op_Perth Scorchers 0.048055792398180495
17 op_Hobart Hurricanes 0.046694007142804014
16 op_Brisbane Heat 0.04635219625998577
14 Sydney Thunder 0.04615097353696895
15 op_Adelaide Strikers 0.04462696750284857
8 Brisbane Heat 0.043148516439675086
12 Perth Scorchers 0.040368296772680244
9 Hobart Hurricanes 0.04030884815489545
5 month_02 0.03874284807926
19 op_Melbourne Stars 0.03867966750770764
22 op_Sydney Thunder 0.037971280750654555
10 Melbourne Renegades 0.03790038837439043
4 derby 0.031063653650060585


In [83]:
# Use all features
knn = KNeighborsClassifier(weights='distance')
cross_validate(knn, X, y, cv=10, scoring='f1')['test_score'].mean()

0.5134777407854035

### Select from model
Cannot use due to the same reason with BernoulliNB class

## Neural Network

In [145]:
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Activation, Dropout

# Set random seed for reproducibility
tf.random.set_seed(81)

f1 = []

# Cross validation
for train_idx, test_idx in StratifiedKFold(n_splits=10).split(X, y):
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
    
    # Transform data for neural network
    # One node predicted all instances to class 1
    y_train_nn = to_categorical(y_train, 2)

    # Define network
    net = Sequential()
    net.add(Dense(32, input_shape=(23,)))
    net.add(Activation('swish'))
    net.add(Dropout(rate=0.2))
    
    net.add(Dense(16))
    net.add(Activation('swish'))
    net.add(Dropout(rate=0.2))
    
    net.add(Dense(8))
    net.add(Activation('swish'))
    
    net.add(Dense(4))
    net.add(Activation('swish'))
    
    net.add(Dense(2, activation='softmax'))

    # Compile the network
    net.compile(loss='binary_crossentropy', optimizer='adam', metrics=['Precision', 'Recall'])
    
    # Fit the network with data
    net.fit(X_train, y_train_nn, epochs=20, batch_size=32, shuffle=False, validation_split=0.25, verbose=0)
    outputs = net.predict(X_test)
    y_pred = np.argmax(outputs, axis=1)
    score = f1_score(y_test, y_pred)
    f1.append(score)
    
print(np.mean(f1))   

0.5876983326667393
