In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
match = pd.read_csv('/kaggle/input/ipl-data-set/matches.csv')
delivery = pd.read_csv('/kaggle/input/ipl-data-set/deliveries.csv')

In [None]:
match.head()

In [None]:
delivery.head()

In [None]:
''' using groupby we can get in every inning of each match what are
total runs'''
total_score_df = delivery.groupby(['match_id','inning']).sum()['total_runs'].reset_index()

In [None]:
total_score_df

In [None]:
total_score_df = total_score_df[total_score_df['inning'] != 2]

In [None]:
total_score_df

In [None]:
match_df = match.merge(total_score_df[['match_id','total_runs']],left_on='id',right_on='match_id')

In [None]:
match_df.head()

In [None]:
match_df['team1'].unique()

In [None]:
# this are the current teams we need
teams = [
    'Sunrisers Hyderabad',
    'Mumbai Indians',
    'Royal Challengers Bangalore',
    'Kolkata Knight Riders',
    'Kings XI Punjab',
    'Chennai Super Kings', 
    'Rajasthan Royals',
    'Delhi Capitals'
]

In [None]:
match_df['team1'] = match_df['team1'].str.replace('Delhi Daredevils','Delhi Capitals')
match_df['team2'] = match_df['team1'].str.replace('Delhi Daredevils','Delhi Capitals')

match_df['team1'] = match_df['team1'].str.replace('Deccan Chargers','Sunrisers Hyderabad')
match_df['team2'] = match_df['team1'].str.replace('Deccan Chargers','Sunrisers Hyderabad')

In [None]:
match_df = match_df[match_df['team1'].isin(teams)]
match_df = match_df[match_df['team2'].isin(teams)]

In [None]:
match_df.shape

In [None]:
match_df = match_df[match_df['dl_applied']==0]

In [None]:
match_df = match_df[['match_id','city','winner','total_runs']]

In [None]:
delivery_df = match_df.merge(delivery,on="match_id")

In [None]:
delivery_df = delivery_df[delivery_df['inning'] == 2]

In [None]:
delivery_df.shape

In [None]:
delivery_df['current_score'] = delivery_df.groupby('match_id').cumsum()['total_runs_y']

In [None]:
delivery_df['runs_left'] = delivery_df['total_runs_x'] - delivery_df['current_score']

In [None]:
delivery_df['balls_left'] = 126 - (delivery_df['over']*6 + delivery_df['ball'])

In [None]:
delivery_df['player_dismissed'] = delivery_df['player_dismissed'].fillna("0")
delivery_df['player_dismissed'] = delivery_df['player_dismissed'].apply(lambda x:x if x == "0" else "1")
delivery_df['player_dismissed'] = delivery_df['player_dismissed'].astype('int')
wickets = delivery_df.groupby('match_id').cumsum()['player_dismissed'].values
delivery_df['wickets'] = 10 - wickets
delivery_df.head()

In [None]:
delivery_df

In [None]:
# crr = runs/overs
delivery_df['crr'] = (delivery_df['current_score']*6)/(120 - delivery_df['balls_left'])

In [None]:
delivery_df['rrr'] = (delivery_df['runs_left']*6)/delivery_df['balls_left']

In [None]:
def result(row):
    return 1 if row['batting_team'] == row['winner'] else 0

In [None]:
delivery_df['result'] = delivery_df.apply(result,axis=1)

In [None]:
df = delivery_df[['batting_team','bowling_team','city','runs_left','balls_left','wickets','total_runs_x','crr','rrr','result']]

In [None]:
print(df.shape)
df.head()

In [None]:
df = df.sample(df.shape[0])

In [None]:
df.sample()

In [None]:
df.dropna(inplace=True)

In [None]:
df = df[df['balls_left']!=0]

In [None]:
x = df.drop(columns = ['result'])
y = df['result']

from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=1)

In [None]:
x_train

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

trf = ColumnTransformer([
    ('trf',OneHotEncoder(sparse=False,drop='first'),['batting_team','bowling_team','city'])
]
,remainder='passthrough')

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline

In [None]:
pipe = Pipeline(steps=[
    ('step1',trf),
    ('step2',LogisticRegression(solver='liblinear'))
    
])

In [None]:
pipe.fit(x_train,y_train)

In [None]:
y_pred = pipe.predict(x_test)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred)

In [None]:
pipe.predict_proba(x_test)[2]

In [None]:
def match_progression(x_df,match_id,pipe):
    match = x_df[x_df['match_id'] == match_id]
    match = match[(match['ball'] == 6)]
    temp_df = match[['batting_team','bowling_team','city','runs_left','balls_left','wickets','total_runs_x','crr','rrr']].dropna()
    temp_df = temp_df[temp_df['balls_left'] != 0]
    result = pipe.predict_proba(temp_df)
    temp_df['lose'] = np.round(result.T[0]*100,1)
    temp_df['win'] = np.round(result.T[1]*100,1)
    temp_df['end_of_over'] = range(1,temp_df.shape[0]+1)
    
    target = temp_df['total_runs_x'].values[0]
    runs = list(temp_df['runs_left'].values)
    new_runs = runs[:]
    runs.insert(0,target)
    temp_df['runs_after_over'] = np.array(runs)[:-1] - np.array(new_runs)
    wickets = list(temp_df['wickets'].values)
    new_wickets = wickets[:]
    new_wickets.insert(0,10)
    wickets.append(0)
    w = np.array(wickets)
    nw = np.array(new_wickets)
    temp_df['wickets_in_over'] = (nw - w)[0:temp_df.shape[0]]
    
    print("Target-",target)
    temp_df = temp_df[['end_of_over','runs_after_over','wickets_in_over','lose','win']]
    return temp_df,target
    
    

In [None]:
temp_df,target = match_progression(delivery_df,74,pipe)
temp_df

In [None]:
import pickle
pickle.dump(pipe,open('pipe.pkl','wb'))

In [None]:
teams

In [None]:
delivery_df['city'].unique()