# #                                              Modelling

In [1]:
#importing libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as ticker
import matplotlib.ticker as plticker
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

In [2]:
#load data 

world_cup = pd.read_csv('Datasets/T20TeamStats.csv')
results = pd.read_csv('Datasets/T20Records.csv')
ranks = pd.read_csv('Datasets/T20Ranking.csv')

In [3]:
world_cup.head()

Unnamed: 0,Team,Group,Previous Appearances,Previous Titles,Previous Finals,Previous Semifinals,Current Ranking
0,Afghanistan,A,5,0,0,0,8
1,Australia,A,7,1,2,2,6
2,Bangladesh,A,7,0,0,0,10
3,England,A,7,1,2,1,2
4,India,A,7,1,2,1,1


In [4]:
# Previous records
results

Unnamed: 0,Date,Team1,Team2,Winner,Venue
0,2-18-2022,Sri Lanka,Australia,Australia,Melbourne Cricket Ground
1,2-18-2022,India,West Indies,India,Eden Gardens
2,2-18-2022,Nepal,Oman,Nepal,Al Amerat Cricket Ground Oman Cricket (Ministr...
3,2-18-2022,Canada,Philippines,Canada,Al Amerat Cricket Ground Oman Cricket (Ministr...
4,2-18-2022,UAE,Ireland,United Arab Emirates,Al Amerat Cricket Ground Oman Cricket (Ministr...
...,...,...,...,...,...
1287,2-16-2006,New Zealand,West Indies,tie,Eden Park
1288,01-09-2006,Australia,South Africa,Australia,Brisbane Cricket Ground
1289,10-21-2005,South Africa,New Zealand,New Zealand,New Wanderers Stadium
1290,6-13-2005,England,Australia,England,The Rose Bowl


In [5]:
#Removing the record with ties
data=results[results.Winner!='tie']

In [6]:
# Dropping Venue column ,Since Only Australia will host the World Cup so home team advantage not possible for other team
data.drop(columns=['Venue'],inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [7]:
#Dropping the date column
data.drop(columns=['Date'],inplace=True)
data.head()

Unnamed: 0,Team1,Team2,Winner
0,Sri Lanka,Australia,Australia
1,India,West Indies,India
2,Nepal,Oman,Nepal
3,Canada,Philippines,Canada
4,UAE,Ireland,United Arab Emirates


In [8]:
#Filtering the records for only teams that will play the world cup
worldcup_teams = [' England ', ' South Africa ', ' Scotland ', ' West Indies ', 
            ' Pakistan ', ' New Zealand ', ' Sri Lanka ', ' Afghanistan ', 
            ' Australia ', ' Bangladesh ', ' India ',' Namibia ']
df_teams_1 = data[data['Team1'].isin(worldcup_teams)]
df_teams_2 = data[data['Team2'].isin(worldcup_teams)]
df_teams = pd.concat((df_teams_1, df_teams_2))
df_teams.drop_duplicates()
df_teams.count()

Team1     779
Team2     779
Winner    779
dtype: int64

In [9]:
#Adding column 'Count': It will record the team which won the most matches from its previous 6 matches (i.e. if India have won 4 matches from its previous 6 matches and Pakistan have won 3 matches than count which select India)
# and 'Rank': The team which is having higher rank will be named in this column
df_teams['Count']=0
df_teams['Rank']=0
df_team= df_teams.reset_index(drop=True) 
df_team

Unnamed: 0,Team1,Team2,Winner,Count,Rank
0,Sri Lanka,Australia,Australia,0,0
1,India,West Indies,India,0,0
2,West Indies,India,India,0,0
3,Sri Lanka,Australia,Australia,0,0
4,Australia,Sri Lanka,Australia,0,0
...,...,...,...,...,...
774,South Africa,Australia,South Africa,0,0
775,Australia,South Africa,Australia,0,0
776,South Africa,New Zealand,New Zealand,0,0
777,England,Australia,England,0,0


In [10]:
#filling the count and rank column
for i in range(700):
    dt1=df_team['Team1'].iloc[i]
    dt2=df_team['Team2'].iloc[i]
    c1=0
    c2=0
    w1=0
    w2=0
    for j in range(i+1,i+50):
        if ((df_team['Team1'].iloc[j]==dt1) or (df_team['Team2'].iloc[j]==dt1.rstrip())) and c1<6:
            if df_team['Winner'].iloc[j]==dt1.strip():
                w1=w1+1
                c1=c1+1
            else:
                c1=c1+1
    
    for j in range(i+1,i+50):
        if ((df_team['Team1'].iloc[j]==(dt2+ ' ')) or (df_team['Team2'].iloc[j]==dt2)) and c2<6:
            if df_team['Winner'].iloc[j]==dt2.strip():
                w2=w2+1
                c2=c2+1
            else:
                c2=c2+1
    if(w2>w1):
        df_team['Count'].iloc[i]=dt2
    else:
        df_team['Count'].iloc[i]=dt1
    r1=0
    r2=0
    for k in range(12):
        if (dt1.strip()==ranks['Team'].iloc[k]):
            r1=ranks['Rank'].iloc[k]
    for k in range(12):
        if (dt2.strip()==ranks['Team'].iloc[k]):
            r2=ranks['Rank'].iloc[k]
    if(r2>r1):
        df_team['Rank'].iloc[i]=dt1
    else:
        df_team['Rank'].iloc[i]=dt2
            
    
        

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [11]:
data=df_team.iloc[0:700]
data

Unnamed: 0,Team1,Team2,Winner,Count,Rank
0,Sri Lanka,Australia,Australia,Australia,Australia
1,India,West Indies,India,India,India
2,West Indies,India,India,India,India
3,Sri Lanka,Australia,Australia,Australia,Australia
4,Australia,Sri Lanka,Australia,Australia,Australia
...,...,...,...,...,...
695,Sri Lanka,West Indies,Sri Lanka,Sri Lanka,West Indies
696,Pakistan,South Africa,Pakistan,South Africa,Pakistan
697,New Zealand,Sri Lanka,Sri Lanka,Sri Lanka,New Zealand
698,India,South Africa,South Africa,South Africa,India


In [12]:
#Assigning the binary number to data , team1: 0 and team2: 1
for i in range(700):
    dt1=data['Team1'].iloc[i]
    dt2=data['Team2'].iloc[i]
    data['Team1'].iloc[i]=0
    data['Team2'].iloc[i]=1
    if data['Winner'].iloc[i]==dt1.strip():
        data['Winner'].iloc[i]=data['Team1'].iloc[i]
    else:
        data['Winner'].iloc[i]=data['Team2'].iloc[i]
    
    if data['Count'].iloc[i]==dt1:
        data['Count'].iloc[i]=data['Team1'].iloc[i]
    else:
        data['Count'].iloc[i]=data['Team2'].iloc[i]
    
    if data['Rank'].iloc[i]==dt1:
        data['Rank'].iloc[i]=data['Team1'].iloc[i]
    else:
        data['Rank'].iloc[i]=data['Team2'].iloc[i]
        
    
        

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [13]:
data

Unnamed: 0,Team1,Team2,Winner,Count,Rank
0,0,1,1,1,1
1,0,1,0,0,0
2,0,1,1,1,1
3,0,1,1,1,1
4,0,1,0,0,0
...,...,...,...,...,...
695,0,1,0,0,1
696,0,1,0,1,0
697,0,1,1,1,0
698,0,1,1,1,0


In [14]:
#importing the fixture file of upcoming world cup
fixtures = pd.read_csv('Datasets/T20Fixture.csv')
fixtures

Unnamed: 0,Date,Column1,Team_1,Team_2,Venue,Result
0,22nd October 2022,1,Australia,New Zealand,Sydney Cricket Ground,
1,22nd October 2022,1,England,Afghanistan,Perth Stadium,
2,23rd October 2022,1,Group A Winner,Group B Runner Up,Bellerive Oval,
3,23rd October 2022,1,India,Pakistan,Melbourne Cricket Ground,
4,24th October 2022,1,Bangladesh,Group A Runner up,Bellerive Oval,
5,24th October 2022,1,South Africa,Group B Winner,Bellerive Oval,
6,25th October 2022,1,Australia,Group A Winner,Perth Stadium,
7,26th October 2022,1,England,Group B Runner UP,Melbourne Cricket Ground,
8,26th October 2022,1,New Zealand,Afghanistan,Melbourne Cricket Ground,
9,27th October 2022,1,South Africa,Bangladesh,Sydney Cricket Ground,


In [15]:
#selecting the record till League matches
fixtures=fixtures.iloc[0:30]
fixtures

Unnamed: 0,Date,Column1,Team_1,Team_2,Venue,Result
0,22nd October 2022,1,Australia,New Zealand,Sydney Cricket Ground,
1,22nd October 2022,1,England,Afghanistan,Perth Stadium,
2,23rd October 2022,1,Group A Winner,Group B Runner Up,Bellerive Oval,
3,23rd October 2022,1,India,Pakistan,Melbourne Cricket Ground,
4,24th October 2022,1,Bangladesh,Group A Runner up,Bellerive Oval,
5,24th October 2022,1,South Africa,Group B Winner,Bellerive Oval,
6,25th October 2022,1,Australia,Group A Winner,Perth Stadium,
7,26th October 2022,1,England,Group B Runner UP,Melbourne Cricket Ground,
8,26th October 2022,1,New Zealand,Afghanistan,Melbourne Cricket Ground,
9,27th October 2022,1,South Africa,Bangladesh,Sydney Cricket Ground,


In [16]:
#Dropping the Date,Column1 and Venue columns
fixtures.drop(columns=[' Date','Column1','Venue'],inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [17]:
#Seperating the output i.e. Winner column for testing model
y=data["Winner"]
y = y.astype(float, errors = 'raise')

In [18]:
#dropping the target column and creating features file
X=data.drop('Winner',axis=1)
X['Team1'] = X.Team1.astype(float)
X['Team2'] = X.Team2.astype(float)
X['Count'] = X.Count.astype(float)
X['Rank'] = X.Rank.astype(float)

In [19]:
#importing libraries for SVM(Support Vector Machine) model
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2)

In [20]:
model = SVC(kernel = 'linear', C = 1)

In [21]:
model.fit(X_train, y_train)

SVC(C=1, kernel='linear')

In [22]:
svm_pred = model.predict(X_test)

In [23]:
#Accuracy score
accuracy = model.score(X_test, y_test)
accuracy

0.6214285714285714

In [24]:
U=fixtures.drop('Result',axis=1)
U['Count']=0
U['Rank']=0

In [25]:
U

Unnamed: 0,Team_1,Team_2,Count,Rank
0,Australia,New Zealand,0,0
1,England,Afghanistan,0,0
2,Group A Winner,Group B Runner Up,0,0
3,India,Pakistan,0,0
4,Bangladesh,Group A Runner up,0,0
5,South Africa,Group B Winner,0,0
6,Australia,Group A Winner,0,0
7,England,Group B Runner UP,0,0
8,New Zealand,Afghanistan,0,0
9,South Africa,Bangladesh,0,0


In [26]:
#We have assumped from recent performance of teams in qualifying matches that Group A winner : Sri Lanka
#Group A Runner Up : Namibia , Group B winner : West Indies and Group B runner Up : Scotland
for i in range(30):
    if (U['Team_1'].iloc[i].strip()=="Group A Winner"):
        U['Team_1'].iloc[i]="Sri Lanka "
    elif (U['Team_1'].iloc[i].strip()=="Group B Winner"):
        U['Team_1'].iloc[i]="West Indies "
    elif (U['Team_1'].iloc[i].strip()=="Group A Runner Up"):
        U['Team_1'].iloc[i]="Namibia "
    elif (U['Team_1'].iloc[i].strip()=="Group B Runner Up"):
        U['Team_1'].iloc[i]="Scotland "
    else:
        continue
for i in range(30):
    if (U['Team_2'].iloc[i].strip()=="Group A Winner"):
        U['Team_2'].iloc[i]="Sri Lanka "
    elif (U['Team_2'].iloc[i].strip()=="Group B Winner"):
        U['Team_2'].iloc[i]="West Indies "
    elif (U['Team_2'].iloc[i].strip()=="Group A Runner Up"):
        U['Team_2'].iloc[i]="Namibia "
    elif (U['Team_2'].iloc[i].strip()=="Group B Runner Up"):
        U['Team_2'].iloc[i]="Scotland "
    else:
        continue   
for i in range(30):
    dt1=U['Team_1'].iloc[i]
    dt2=U['Team_2'].iloc[i]
    r1=0
    r2=0
    for k in range(12):
        if (dt1.strip()==ranks['Team'].iloc[k]):
            r1=ranks['Rank'].iloc[k]
        
    for k in range(12):
        if (dt2.strip()==ranks['Team'].iloc[k]):
            r2=ranks['Rank'].iloc[k]
        
    if(r2>r1):
        U['Rank'].iloc[i]=dt1
        U['Count'].iloc[i]=dt1
        
    else:
        U['Rank'].iloc[i]=dt2
        U['Count'].iloc[i]=dt2
        
    

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [27]:
for i in range(30):
    dt1=U['Team_1'].iloc[i]
    dt2=U['Team_2'].iloc[i]
    U['Team_1'].iloc[i]=0
    U['Team_2'].iloc[i]=1
    if U['Count'].iloc[i]==dt1:
        U['Count'].iloc[i]=U['Team_1'].iloc[i]
    else:
        U['Count'].iloc[i]=U['Team_2'].iloc[i]
    
    if U['Rank'].iloc[i]==dt1:
        U['Rank'].iloc[i]=U['Team_1'].iloc[i]
    else:
        U['Rank'].iloc[i]=U['Team_2'].iloc[i]

In [28]:
U

Unnamed: 0,Team_1,Team_2,Count,Rank
0,0,1,1,1
1,0,1,0,0
2,0,1,0,0
3,0,1,0,0
4,0,1,1,1
5,0,1,0,0
6,0,1,0,0
7,0,1,1,1
8,0,1,0,0
9,0,1,0,0


In [29]:
U['Team_1'] = U.Team_1.astype(float)
U['Team_2'] = U.Team_2.astype(float)
U['Count'] = U.Count.astype(float)
U['Rank'] = U.Rank.astype(float)

In [30]:
#Appling SVM model on World Cup schedule
svm_pred = model.predict(U)

Feature names unseen at fit time:
- Team_1
- Team_2
Feature names seen at fit time, yet now missing:
- Team1
- Team2



In [31]:
#Predicted vales 0: Team1 and 1:Team2
svm_pred

array([1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0.,
       0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [32]:
#Linking the predicted values with records
print("Results till League matches\n")
for i in range(30):
    if (svm_pred[i]==1.0):
        print(str(i+1)+"."+fixtures['Team_1'].iloc[i] + " Vs " + fixtures['Team_2'].iloc[i] + " : " + fixtures['Team_2'].iloc[i] )
        print()
    else:
        print(str(i+1)+"."+fixtures['Team_1'].iloc[i] + " Vs " + fixtures['Team_2'].iloc[i] + " : " + fixtures['Team_1'].iloc[i] )
        print()

Results till League matches

1.Australia  Vs  New Zealand :  New Zealand

2.England  Vs  Afghanistan : England 

3.Group A Winner  Vs  Group B Runner Up : Group A Winner 

4.India  Vs  Pakistan : India 

5.Bangladesh  Vs  Group A Runner up :  Group A Runner up

6.South Africa  Vs  Group B Winner : South Africa 

7.Australia  Vs  Group A Winner : Australia 

8.England  Vs  Group B Runner UP :  Group B Runner UP

9.New Zealand  Vs  Afghanistan : New Zealand 

10.South Africa  Vs  Bangladesh : South Africa 

11.India  Vs  Group A Runner Up : India 

12.Pakistan  Vs  Group B Runner Up : Pakistan 

13.Afghanistan  Vs  Group B Runner UP :  Group B Runner UP

14.England  Vs  Australia : England 

15.New Zealand  Vs  Group A Winner : New Zealand 

16.Bangladesh  Vs  Group B Winner :  Group B Winner

17.Pakistan  Vs  Group A Runner Up : Pakistan 

18.India  Vs  South Africa : India 

19.Australia  Vs  Group B Runner Up : Australia 

20.Afghanistan  Vs  Group A winner :  Group A winner

21.Engla

In [33]:
#Point table after league matches
print("From the Modelling the Point table as follows:\n")
print("Group 1\n")
print("Team\t\t\tWin")
print("Afghanistan\t\t0")
print("Australia\t\t3")
print("England\t\t\t4")
print("New Zealand\t\t4")
print("Group A Winner\t\t2")
print("Group B Runner Up\t2")
print("\nGroup 2\n")
print("Team\t\t\tWin")
print("Bangladesh\t\t0")
print("India\t\t\t5")
print("Pakistan\t\t4")
print("South Africa\t\t3")
print("Group B Winner\t\t2")
print("Group A Runner Up\t1")



From the Modelling the Point table as follows:

Group 1

Team			Win
Afghanistan		0
Australia		3
England			4
New Zealand		4
Group A Winner		2
Group B Runner Up	2

Group 2

Team			Win
Bangladesh		0
India			5
Pakistan		4
South Africa		3
Group B Winner		2
Group A Runner Up	1


In [34]:
V=U.iloc[18:20]
V['Count'].iloc[1]=0.0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [35]:
#Predicting the semi-final winners
svm_pred = model.predict(V)

Feature names unseen at fit time:
- Team_1
- Team_2
Feature names seen at fit time, yet now missing:
- Team1
- Team2



In [36]:
svm_pred


array([0., 0.])

In [37]:
print("Semi Finals\n")
print("India(0) Vs England(1) : India")
print("New Zealand(0) Vs Pakistan(1) : New Zealand")

Semi Finals

India(0) Vs England(1) : India
New Zealand(0) Vs Pakistan(1) : New Zealand


In [38]:
#Predicting the Final Winner
V=U.iloc[18:19]
svm_pred = model.predict(V)
svm_pred

Feature names unseen at fit time:
- Team_1
- Team_2
Feature names seen at fit time, yet now missing:
- Team1
- Team2



array([0.])

In [39]:
print("Final\n")
print("India(0) Vs New Zealand(0) : India(0)\n")
print("Probable Winner of World Cup : India")

Final

India(0) Vs New Zealand(0) : India(0)

Probable Winner of World Cup : India
