# Logistic Regression Based Prediction of Match Result

### Importing the modules.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

### Metadata of the dataset.

In [2]:
Metadata = pd.read_excel(r"D:\PYTHON\Logistic Regression\Sports Data.xlsx",sheet_name = 'Meta data')

In [3]:
Metadata.columns=['Variables', 'Description']
Metadata.drop(0,inplace=True)

In [4]:
Metadata

Unnamed: 0,Variables,Description
1,Game_number,Unique ID for each match
2,Result,Final result of the match
3,Avg_team_Age,Average age of the playing 11 players for that...
4,Match_light_type,"type of match: Day, night or day & night"
5,Match_format,"Format of the match: T20, ODI or test"
6,Bowlers_in_team,how many full time bowlers has been player in ...
7,All_rounder_in_team,how many full time all rounder has been player...
8,First_selection,First inning of team: batting or bowling
9,Opponent,Opponent team in the match
10,Season,"What is the season of the city, where match ha..."


### Extarcting the data from an excel file.

In [5]:
data = pd.read_excel(r"D:\PYTHON\Logistic Regression\Sports Data.xlsx",sheet_name ='Sports data')

In [6]:
df = data.copy() # Tacking the backup of a data

In [7]:
df.shape # Dataframe has 2930 rows and 22 columns

(2930, 22)

In [8]:
df.head()

Unnamed: 0,Game_number,Result,Avg_team_Age,Match_light_type,Match_format,Bowlers_in_team,All_rounder_in_team,First_selection,Opponent,Season,...,Max_run_scored_1over,Max_wicket_taken_1over,Extra_bowls_bowled,Min_run_given_1over,Min_run_scored_1over,Max_run_given_1over,extra_bowls_opponent,player_highest_run,Players_scored_zero,player_highest_wicket
0,Game_1,Loss,18.0,Day,ODI,3.0,3.0,Bowling,Srilanka,Summer,...,13.0,3,0.0,2,3.0,6.0,0,54.0,3,1
1,Game_2,Win,24.0,Day,T20,3.0,4.0,Batting,Zimbabwe,Summer,...,12.0,1,0.0,0,3.0,6.0,0,69.0,2,1
2,Game_3,Loss,24.0,Day and Night,T20,3.0,2.0,Bowling,Zimbabwe,,...,14.0,4,0.0,0,3.0,6.0,0,69.0,3,1
3,Game_4,Win,24.0,,ODI,2.0,2.0,Bowling,Kenya,Summer,...,15.0,4,0.0,2,3.0,6.0,0,73.0,3,1
4,Game_5,Loss,24.0,Night,ODI,1.0,3.0,Bowling,Srilanka,Summer,...,12.0,4,0.0,0,3.0,6.0,0,80.0,3,1


#### Checking the data types and count of non null rows of each attributes.

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2930 entries, 0 to 2929
Data columns (total 22 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Game_number             2930 non-null   object 
 1   Result                  2930 non-null   object 
 2   Avg_team_Age            2833 non-null   float64
 3   Match_light_type        2878 non-null   object 
 4   Match_format            2860 non-null   object 
 5   Bowlers_in_team         2848 non-null   float64
 6   All_rounder_in_team     2890 non-null   float64
 7   First_selection         2871 non-null   object 
 8   Opponent                2894 non-null   object 
 9   Season                  2868 non-null   object 
 10  Audience_number         2849 non-null   float64
 11  Offshore                2866 non-null   object 
 12  Max_run_scored_1over    2902 non-null   float64
 13  Max_wicket_taken_1over  2930 non-null   int64  
 14  Extra_bowls_bowled      2901 non-null   

#### From the above output, inconsistency in data type of 2 columns is observed which is  Players_scored_zero and 	player_highest_wicket which is supposed to be a numeric data type but is considered as Object.

In [10]:
l = ['player_highest_wicket','Players_scored_zero']
# Identifying the enties causing the data inconsistency
for x in l:
    print(f'{df[x].unique()}')

[1 2 3 4 'Three' 5]
[3 2 1 4 'Three']


#### Replacing the three by 3 to make the data type consistent.

In [11]:
r = {"Three":3}
#replacing the two columns
for x in l:
    df[x].replace(r, inplace = True)

### Handiling the missing values.

In [12]:
(df.isna().mean() * 100).round(2).map(lambda x : str(x)+'%')

Game_number                0.0%
Result                     0.0%
Avg_team_Age              3.31%
Match_light_type          1.77%
Match_format              2.39%
Bowlers_in_team            2.8%
All_rounder_in_team       1.37%
First_selection           2.01%
Opponent                  1.23%
Season                    2.12%
Audience_number           2.76%
Offshore                  2.18%
Max_run_scored_1over      0.96%
Max_wicket_taken_1over     0.0%
Extra_bowls_bowled        0.99%
Min_run_given_1over        0.0%
Min_run_scored_1over      0.92%
Max_run_given_1over       1.16%
extra_bowls_opponent       0.0%
player_highest_run        0.96%
Players_scored_zero        0.0%
player_highest_wicket      0.0%
dtype: object

In [13]:
df.isna().sum()[df.isna().sum()>0] # Columns with null rows

Avg_team_Age            97
Match_light_type        52
Match_format            70
Bowlers_in_team         82
All_rounder_in_team     40
First_selection         59
Opponent                36
Season                  62
Audience_number         81
Offshore                64
Max_run_scored_1over    28
Extra_bowls_bowled      29
Min_run_scored_1over    27
Max_run_given_1over     34
player_highest_run      28
dtype: int64

#### Imputing the Avg_team_Age with mean values.

In [14]:
df['Avg_team_Age'].fillna(df['Avg_team_Age'].mean(), inplace=True)

#### Imputing the Match_format field with mode after genralizing the 20-20 and T20  format to T20 as both of match format are same.

In [15]:
df.Match_format.unique()

array(['ODI', 'T20', 'Test', '20-20', nan], dtype=object)

In [16]:
d={'20-20':'T20'}
df.Match_format.replace(d, inplace=True)
df.Match_format.fillna(df.Match_format.mode()[0], inplace =True)

#### Correcting the inconsistency in First_slection field by replacing the Bat with Batting.

In [17]:
df.First_selection.unique()

array(['Bowling', 'Batting', 'Bat', nan], dtype=object)

In [18]:
fs ={'Bat':'Batting'}
df.First_selection.replace(fs, inplace= True)
df.First_selection.fillna(df.First_selection.mode()[0], inplace= True)

#### Imputing the Match_light_type, Bowlers_in_team, All_rounders_in_team, Season with their  Mode.

In [19]:
c= ['Match_light_type','Bowlers_in_team','All_rounder_in_team','Season', 'Offshore']
for x in c:
    print(f'{x} : {df[x].unique()}')   
# Imputing
for x in c:
    df[x].fillna(df[x].mode()[0], inplace=True)

Match_light_type : ['Day' 'Day and Night' nan 'Night']
Bowlers_in_team : [ 3.  2.  1. nan  4.  5.]
All_rounder_in_team : [ 3.  4.  2.  1. nan]
Season : ['Summer' nan 'Winter' 'Rainy']
Offshore : ['No' 'Yes' nan]


#### Creating the new category called 'Unknown' for null entries in Opponent Column as imputing with mode will not be a appropriate  option.

In [20]:
df.Opponent.unique()

array(['Srilanka', 'Zimbabwe', 'Kenya', 'Australia', 'England',
       'South Africa', 'Pakistan', 'West Indies', 'Bangladesh', nan],
      dtype=object)

In [21]:
df.Opponent.fillna('Unknown',inplace =True)

#### Imputing Audience number and all the score related numeric field with their mean.

In [22]:
num  = df.isna().sum()[df.isna().sum()>0].index
for x in num:
    print(f'{x} : {df[x].unique()}')
# Imputing with the mean
for x in num:
    df[x].fillna(df[x].mean().round(0), inplace =True)

Audience_number : [ 9940.  8400. 13146. ... 20937. 28756. 14007.]
Max_run_scored_1over : [13. 12. 14. 15. 16. 19. 21. 22. 25. 18. 11. 17. 24. 20. nan 23.]
Extra_bowls_bowled : [ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 17. 31. 13. 26. 24.
 22. 19. 23. 14. 15. 29. 28. 21. 25. 20. 16. 37. 38. 30. 40. 18. 36. 34.
 32. 33. nan 35. 27.]
Min_run_scored_1over : [ 3.  4.  1.  2. nan]
Max_run_given_1over : [ 6.  7.  8.  9. 10. 11. 25. 12. 14. 22. 15. 27. 21. 17. 13. 37. 16. 20.
 40. 24. 33. 19. 36. 18. 29. 31. 32. 34. 26. 30. nan 23.]
player_highest_run : [ 54.  69.  73.  80.  97.  70.  33.  50.  79. 100.  47.  52.  37.  96.
  57.  66.  83.  32.  49.  84.  98.  87.  43.  31.  99.  39.  90.  65.
  36.  45.  58.  59.  60.  34.  94.  48.  85.  75.  63.  62.  93.  51.
  78.  61.  77.  38.  89.  55.  46.  41.  42.  91.  64.  40.  67.  81.
  68.  82.  71.  88.  53.  76.  95.  30.  74.  56.  86.  44.  92.  72.
  35.  nan]


In [23]:
df.isna().sum()[df.isna().sum()>0]

Series([], dtype: int64)

### Encoding the categorical data to feed it into Logistic Regression Model.

#### Label Encoding the Classification column which is result.

In [24]:
le = LabelEncoder()

In [25]:
df['Result_ec'] = le.fit_transform(df['Result'])

In [26]:
df[['Result','Result_ec']].drop_duplicates()

Unnamed: 0,Result,Result_ec
0,Loss,0
1,Win,1


#### One Hot Encoding all other categorical field as none of them follow hierarchical order.

In [27]:
cate = df.select_dtypes('object').drop(['Game_number','Result'], axis=1).columns
print(cate)

for x in cate:
    df = pd.get_dummies(df, prefix =  x , columns = [x] )

Index(['Match_light_type', 'Match_format', 'First_selection', 'Opponent',
       'Season', 'Offshore'],
      dtype='object')


#### Checking the encoded columns.

In [28]:
df.columns

Index(['Game_number', 'Result', 'Avg_team_Age', 'Bowlers_in_team',
       'All_rounder_in_team', 'Audience_number', 'Max_run_scored_1over',
       'Max_wicket_taken_1over', 'Extra_bowls_bowled', 'Min_run_given_1over',
       'Min_run_scored_1over', 'Max_run_given_1over', 'extra_bowls_opponent',
       'player_highest_run', 'Players_scored_zero', 'player_highest_wicket',
       'Result_ec', 'Match_light_type_Day', 'Match_light_type_Day and Night',
       'Match_light_type_Night', 'Match_format_ODI', 'Match_format_T20',
       'Match_format_Test', 'First_selection_Batting',
       'First_selection_Bowling', 'Opponent_Australia', 'Opponent_Bangladesh',
       'Opponent_England', 'Opponent_Kenya', 'Opponent_Pakistan',
       'Opponent_South Africa', 'Opponent_Srilanka', 'Opponent_Unknown',
       'Opponent_West Indies', 'Opponent_Zimbabwe', 'Season_Rainy',
       'Season_Summer', 'Season_Winter', 'Offshore_No', 'Offshore_Yes'],
      dtype='object')

In [29]:
df.head(5)

Unnamed: 0,Game_number,Result,Avg_team_Age,Bowlers_in_team,All_rounder_in_team,Audience_number,Max_run_scored_1over,Max_wicket_taken_1over,Extra_bowls_bowled,Min_run_given_1over,...,Opponent_South Africa,Opponent_Srilanka,Opponent_Unknown,Opponent_West Indies,Opponent_Zimbabwe,Season_Rainy,Season_Summer,Season_Winter,Offshore_No,Offshore_Yes
0,Game_1,Loss,18.0,3.0,3.0,9940.0,13.0,3,0.0,2,...,0,1,0,0,0,0,1,0,1,0
1,Game_2,Win,24.0,3.0,4.0,8400.0,12.0,1,0.0,0,...,0,0,0,0,1,0,1,0,1,0
2,Game_3,Loss,24.0,3.0,2.0,13146.0,14.0,4,0.0,0,...,0,0,0,0,1,1,0,0,0,1
3,Game_4,Win,24.0,2.0,2.0,7357.0,15.0,4,0.0,2,...,0,0,0,0,0,0,1,0,1,0
4,Game_5,Loss,24.0,1.0,3.0,13328.0,12.0,4,0.0,0,...,0,1,0,0,0,0,1,0,1,0


In [30]:
 a= df.select_dtypes(['int','float','uint8'])

In [31]:
correlation = a.corr()
c= correlation['Result_ec'].to_frame()
c

Unnamed: 0,Result_ec
Avg_team_Age,0.15333
Bowlers_in_team,0.035185
All_rounder_in_team,0.096162
Audience_number,0.116601
Max_run_scored_1over,0.016203
Max_wicket_taken_1over,0.04612
Extra_bowls_bowled,0.172957
Min_run_given_1over,0.099813
Min_run_scored_1over,0.061739
Max_run_given_1over,0.081194


#### After observing the correlation matrix there seems to be no strong positive or negative relation between variables and Result.

###  Splitting the data into test and train dataset.

In [32]:
y = df.Result_ec

In [33]:
x= df.drop(['Result','Game_number','Result_ec'],axis=1)

In [34]:
x.columns

Index(['Avg_team_Age', 'Bowlers_in_team', 'All_rounder_in_team',
       'Audience_number', 'Max_run_scored_1over', 'Max_wicket_taken_1over',
       'Extra_bowls_bowled', 'Min_run_given_1over', 'Min_run_scored_1over',
       'Max_run_given_1over', 'extra_bowls_opponent', 'player_highest_run',
       'Players_scored_zero', 'player_highest_wicket', 'Match_light_type_Day',
       'Match_light_type_Day and Night', 'Match_light_type_Night',
       'Match_format_ODI', 'Match_format_T20', 'Match_format_Test',
       'First_selection_Batting', 'First_selection_Bowling',
       'Opponent_Australia', 'Opponent_Bangladesh', 'Opponent_England',
       'Opponent_Kenya', 'Opponent_Pakistan', 'Opponent_South Africa',
       'Opponent_Srilanka', 'Opponent_Unknown', 'Opponent_West Indies',
       'Opponent_Zimbabwe', 'Season_Rainy', 'Season_Summer', 'Season_Winter',
       'Offshore_No', 'Offshore_Yes'],
      dtype='object')

In [35]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size= 0.3, random_state =1)

In [36]:
x_train.head()

Unnamed: 0,Avg_team_Age,Bowlers_in_team,All_rounder_in_team,Audience_number,Max_run_scored_1over,Max_wicket_taken_1over,Extra_bowls_bowled,Min_run_given_1over,Min_run_scored_1over,Max_run_given_1over,...,Opponent_South Africa,Opponent_Srilanka,Opponent_Unknown,Opponent_West Indies,Opponent_Zimbabwe,Season_Rainy,Season_Summer,Season_Winter,Offshore_No,Offshore_Yes
608,29.0,2.0,3.0,46268.0,24.0,3,10.0,3,3.0,10.0,...,0,0,0,0,0,1,0,0,1,0
348,27.0,4.0,3.0,19677.0,14.0,2,4.0,2,3.0,6.0,...,0,1,0,0,0,1,0,0,1,0
2844,30.0,4.0,4.0,16149.0,23.0,2,5.0,2,3.0,6.0,...,0,0,0,0,0,1,0,0,0,1
1624,30.0,3.0,1.0,59948.0,20.0,3,11.0,0,2.0,6.0,...,1,0,0,0,0,0,1,0,0,1
612,29.0,3.0,4.0,16828.0,20.0,3,3.0,5,3.0,6.0,...,0,0,0,0,0,0,1,0,0,1


In [37]:
y_train.head()

608     1
348     1
2844    0
1624    0
612     0
Name: Result_ec, dtype: int32

###  Building the Model.

In [38]:
lr = LogisticRegression()

In [39]:
lr.fit(x_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [40]:
coef = lr.coef_

In [41]:
coef

array([[ 2.06417287e-02, -9.66525534e-03,  1.06257800e-01,
         1.74768735e-05, -2.17141673e-02,  4.59822319e-02,
         5.59093367e-02,  1.80280193e-01,  3.49661152e-02,
        -1.71936509e-01,  2.00320150e-01, -3.41060085e-03,
         9.30070710e-02,  6.50012271e-03,  2.37427765e-02,
        -5.18283743e-02,  2.44321343e-02,  4.93692679e-02,
        -4.57690895e-02, -7.25364186e-03,  1.41203166e-02,
        -1.77737800e-02, -8.12781334e-03,  4.01843092e-03,
         2.16340121e-02,  3.35089115e-02,  1.08930861e-02,
        -1.73057405e-02, -2.09963531e-02, -4.51851251e-03,
         6.39224858e-03, -2.91517332e-02,  5.09529135e-02,
        -8.54265744e-02,  3.08201974e-02,  1.16760311e-01,
        -1.20413774e-01]])

In [42]:
intercept = lr.intercept_[0]

In [43]:
intercept

-0.0036531405032466354

#### Prediction

In [44]:
y_predicted = lr.predict(x_test)

#### R^2 Value

In [45]:
lr.score(x_train,y_train)

0.8454412481716236

####  Model fits very well as its R square value is greater than 0.5  indicating the predictors or the independent variables explain 84% of the variation in the probability of the occurance of outcome.

#### Accuracy

In [46]:
lr.score(x_test,y_test)

0.8418657565415245

####  Classification Report

In [47]:
m=metrics.confusion_matrix(y_test,y_predicted)
m
# All the 735 win cases are correctly idnetified as win and only 5 cases out of 139 loss scenario predicted as loss

array([[  5, 139],
       [  0, 735]], dtype=int64)

In [48]:
print(metrics.classification_report(y_test, y_predicted))


              precision    recall  f1-score   support

           0       1.00      0.03      0.07       144
           1       0.84      1.00      0.91       735

    accuracy                           0.84       879
   macro avg       0.92      0.52      0.49       879
weighted avg       0.87      0.84      0.77       879



#### The model correctly identifies 100% actual win cases as win, indicating it is excellent at detecting the wins. However, it correctly identifies only 3% of total losses, meaning it poorly predict the losses and misclassify the losses as win.                                       

#### Checking for all those cases in the test dataset where model has wrongly predicted the Result.

In [49]:
x_test['Y_actual'] = y_test
x_test['Y_predicted'] = y_predicted

In [50]:
x_test[['Y_actual','Y_predicted']][(x_test['Y_actual'] != x_test['Y_predicted']) 
    | (x_test['Y_actual']==0)].groupby(['Y_actual','Y_predicted'])['Y_predicted'].count().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,Y_predicted
Y_actual,Y_predicted,Unnamed: 2_level_1
0,0,5
0,1,139


#### Above Ouput justify that out of 144  actual loss entries model has detected only 3% [5] correctly as loss and remaining 97% [139] of loss cases are falsely detected as Win.

In [51]:
data.groupby('Result')['Game_number'].count()

Result
Loss     473
Win     2457
Name: Game_number, dtype: int64

#### Resaon for this inaccuracy in the loss prediction is probably due to less number of observation of loss scenario.

### Manually Predicting the result outcome based on the coefficients of all the Independent varaible and the intercept.

In [52]:
coef

array([[ 2.06417287e-02, -9.66525534e-03,  1.06257800e-01,
         1.74768735e-05, -2.17141673e-02,  4.59822319e-02,
         5.59093367e-02,  1.80280193e-01,  3.49661152e-02,
        -1.71936509e-01,  2.00320150e-01, -3.41060085e-03,
         9.30070710e-02,  6.50012271e-03,  2.37427765e-02,
        -5.18283743e-02,  2.44321343e-02,  4.93692679e-02,
        -4.57690895e-02, -7.25364186e-03,  1.41203166e-02,
        -1.77737800e-02, -8.12781334e-03,  4.01843092e-03,
         2.16340121e-02,  3.35089115e-02,  1.08930861e-02,
        -1.73057405e-02, -2.09963531e-02, -4.51851251e-03,
         6.39224858e-03, -2.91517332e-02,  5.09529135e-02,
        -8.54265744e-02,  3.08201974e-02,  1.16760311e-01,
        -1.20413774e-01]])

In [53]:
col = x_train.columns

In [54]:
col

Index(['Avg_team_Age', 'Bowlers_in_team', 'All_rounder_in_team',
       'Audience_number', 'Max_run_scored_1over', 'Max_wicket_taken_1over',
       'Extra_bowls_bowled', 'Min_run_given_1over', 'Min_run_scored_1over',
       'Max_run_given_1over', 'extra_bowls_opponent', 'player_highest_run',
       'Players_scored_zero', 'player_highest_wicket', 'Match_light_type_Day',
       'Match_light_type_Day and Night', 'Match_light_type_Night',
       'Match_format_ODI', 'Match_format_T20', 'Match_format_Test',
       'First_selection_Batting', 'First_selection_Bowling',
       'Opponent_Australia', 'Opponent_Bangladesh', 'Opponent_England',
       'Opponent_Kenya', 'Opponent_Pakistan', 'Opponent_South Africa',
       'Opponent_Srilanka', 'Opponent_Unknown', 'Opponent_West Indies',
       'Opponent_Zimbabwe', 'Season_Rainy', 'Season_Summer', 'Season_Winter',
       'Offshore_No', 'Offshore_Yes'],
      dtype='object')

#### Creating dictionary with all the Independent variable with their corresponding coefficient values

In [55]:
coef_values = {}
for x, y in zip(col,coef[0]):
    coef_values[x] = y

In [56]:
coef_values

{'Avg_team_Age': 0.02064172865304875,
 'Bowlers_in_team': -0.009665255342388728,
 'All_rounder_in_team': 0.10625779959941008,
 'Audience_number': 1.74768735356828e-05,
 'Max_run_scored_1over': -0.021714167327425253,
 'Max_wicket_taken_1over': 0.04598223192580883,
 'Extra_bowls_bowled': 0.05590933668250085,
 'Min_run_given_1over': 0.1802801933477336,
 'Min_run_scored_1over': 0.03496611523314216,
 'Max_run_given_1over': -0.17193650881233785,
 'extra_bowls_opponent': 0.20032014996708933,
 'player_highest_run': -0.0034106008540651226,
 'Players_scored_zero': 0.09300707096343623,
 'player_highest_wicket': 0.006500122712667342,
 'Match_light_type_Day': 0.023742776499342956,
 'Match_light_type_Day and Night': -0.05182837425565748,
 'Match_light_type_Night': 0.02443213427473265,
 'Match_format_ODI': 0.04936926792223692,
 'Match_format_T20': -0.04576908950135699,
 'Match_format_Test': -0.007253641860358047,
 'First_selection_Batting': 0.01412031657180617,
 'First_selection_Bowling': -0.01777378

#### Picking up one scenario from the test dataset and applying it on the prediction formula to Predict the Result.

In [57]:
x_test.iloc[0,:] # Case form test dataset

Avg_team_Age                         30.0
Bowlers_in_team                       3.0
All_rounder_in_team                   3.0
Audience_number                   14679.0
Max_run_scored_1over                 15.0
Max_wicket_taken_1over                3.0
Extra_bowls_bowled                    9.0
Min_run_given_1over                   3.0
Min_run_scored_1over                  1.0
Max_run_given_1over                   6.0
extra_bowls_opponent                  3.0
player_highest_run                   53.0
Players_scored_zero                   3.0
player_highest_wicket                 1.0
Match_light_type_Day                  0.0
Match_light_type_Day and Night        0.0
Match_light_type_Night                1.0
Match_format_ODI                      1.0
Match_format_T20                      0.0
Match_format_Test                     0.0
First_selection_Batting               0.0
First_selection_Bowling               1.0
Opponent_Australia                    0.0
Opponent_Bangladesh               

In [59]:
Linear = float(intercept)

for feature, coef in coef_values.items():
    value = float(input(f"Enter value for {feature}: "))
    Linear = Linear + value * coef

print("Linear combination (z) =", Linear)

Prediction = 1 / (1 + np.exp(-(Linear)))

print(" Prediction : ", Prediction.round(0))

Enter value for Avg_team_Age: 30
Enter value for Bowlers_in_team: 3
Enter value for All_rounder_in_team: 3
Enter value for Audience_number: 14679
Enter value for Max_run_scored_1over: 15
Enter value for Max_wicket_taken_1over: 3
Enter value for Extra_bowls_bowled: 9
Enter value for Min_run_given_1over: 3
Enter value for Min_run_scored_1over: 1
Enter value for Max_run_given_1over: 6
Enter value for extra_bowls_opponent: 3
Enter value for player_highest_run: 53
Enter value for Players_scored_zero: 3
Enter value for player_highest_wicket: 1
Enter value for Match_light_type_Day: 0
Enter value for Match_light_type_Day and Night: 0
Enter value for Match_light_type_Night: 1
Enter value for Match_format_ODI: 1
Enter value for Match_format_T20: 0
Enter value for Match_format_Test: 0
Enter value for First_selection_Batting: 0
Enter value for First_selection_Bowling: 1
Enter value for Opponent_Australia: 0
Enter value for Opponent_Bangladesh: 0
Enter value for Opponent_England: 0
Enter value for 

#### After inputting the feature values, formula has correctly predicted the outcome as WIN. This formula can be used to predict the future match result for any scenario.

# Thank you