In [2]:
teams=['Kings XI Punjab',
       'Mumbai Indians',
       'Kolkata Knight Riders',
       'Rajasthan Royals',
       'Chennai Super Kings',
       'Sunrisers Hyderabad',
       'Delhi Capitals'
       'Lucknow Super Giants',
       'Gujarat Titans',
       'Royal Challengers Bengaluru']
city=['Bangalore', 'Chandigarh', 'Delhi', 'Mumbai', 'Kolkata', 'Jaipur',
       'Hyderabad', 'Chennai', 'Cape Town', 'Port Elizabeth', 'Durban',
       'Centurion', 'East London', 'Johannesburg', 'Kimberley',
       'Bloemfontein', 'Ahmedabad', 'Cuttack', 'Nagpur', 'Dharamsala',
       'Kochi', 'Indore', 'Visakhapatnam', 'Pune', 'Raipur', 'Ranchi',
       'Abu Dhabi','Rajkot', 'Kanpur', 'Bengaluru', 'Dubai',
       'Sharjah', 'Navi Mumbai', 'Lucknow', 'Guwahati', 'Mohali']

In [5]:
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline

In [7]:
match= pd.read_csv('DATA1.csv')
delivery= pd.read_csv('DATA2.csv')

In [9]:
first_team_total = delivery.groupby(['match_id','inning']).sum()['total_runs'].reset_index()[lambda x: x['inning']==1]

In [10]:
match1 = match.merge(first_team_total[['match_id','total_runs']], left_on='id', right_on='match_id')
team_replacements = {
    'Delhi Daredevils': 'Delhi Capitals',
    'Deccan Chargers': 'Sunrisers Hyderabad',
    'Rising Pune Supergiants': 'Mumbai Indians',
    'Rising Pune Supergiant': 'Mumbai Indians',
    'Gujarat Lions': 'Gujarat Titans',
    'Kings XI Punjab': 'Punjab Kings',
    'Pune Warriors': 'Mumbai Indians',
    'Kochi Tuskers Kerala': 'Royal Challengers Bangalore'
}
for old_team, new_team in team_replacements.items():
    match1['team1'] = match1['team1'].str.replace(old_team, new_team)
    match1['team2'] = match1['team2'].str.replace(old_team, new_team)
match1 = match1[match1['team1'] != match1['team2']]
match1 = match1[match1['team1'] != match1['team2']][['match_id','city','winner','total_runs']]
delivery1 = match1.merge(delivery,on='match_id')[lambda x: x['inning']==2]

In [11]:
delivery1 = (
    delivery1.assign(
        total_runs_y = pd.to_numeric(delivery1['total_runs_y'], errors='coerce'),
        current_score = delivery1.groupby('match_id')['total_runs_y'].cumsum(),
    )
    .assign(
        runs_left = lambda x: x['total_runs_x'] - x['current_score'],
        balls_left = lambda x: 126 - (x['over'] * 6 + x['ball']),
        player_dismissed = pd.to_numeric(delivery1['player_dismissed'], errors='coerce').fillna(0),
    )
    .assign(
        wicket_left = lambda x: 10 - x['player_dismissed'],
        crr = lambda x: (x['current_score'] * 6) / (x['over'] * 6 + x['ball'] + 1e-9),
        rrr = lambda x: (x['runs_left'] * 6) / (x['balls_left'] + 1e-9)
    )
    .query("balls_left != 0 and runs_left >= 0")
)

In [13]:
def result(row):
    return 1 if row['batting_team'] == row['winner'] else 0
delivery1['result'] = delivery1.apply(result,axis=1)

In [14]:
final = delivery1[['batting_team','bowling_team','city','runs_left','balls_left','wicket_left','total_runs_x','crr','rrr','result']]

In [15]:
X = final.iloc[:,:-1]
y = final.iloc[:,-1]
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=1)

In [17]:
trf = ColumnTransformer([
    ('trf',OneHotEncoder(sparse_output=False,drop='first'),['batting_team','bowling_team','city'])
]
,remainder='passthrough')

In [18]:
pipe = Pipeline(steps=[
    ('step1',trf),
    ('step2',LogisticRegression(solver='liblinear'))
])
pipe.fit(X_train,y_train)
y_pred = pipe.predict(X_test)

In [19]:
def match_progression(x_df,match_id,pipe):
    match = x_df[x_df['match_id'] == match_id]
    match = match[(match['ball'] == 6)]
    temp_df = match[['batting_team','bowling_team','city','runs_left','balls_left','wickets','total_runs_x','crr','rrr']].dropna()
    temp_df = temp_df[temp_df['balls_left'] != 0]
    result = pipe.predict_proba(temp_df)
    temp_df['lose'] = np.round(result.T[0]*100,1)
    temp_df['win'] = np.round(result.T[1]*100,1)
    temp_df['end_of_over'] = range(1,temp_df.shape[0]+1)
    target = temp_df['total_runs_x'].values[0]
    runs = list(temp_df['runs_left'].values)
    new_runs = runs[:]
    runs.insert(0,target)
    temp_df['runs_after_over'] = np.array(runs)[:-1] - np.array(new_runs)
    wickets = list(temp_df['wickets'].values)
    new_wickets = wickets[:]
    new_wickets.insert(0,10)
    wickets.append(0)
    w = np.array(wickets)
    nw = np.array(new_wickets)
    temp_df['wickets_in_over'] = (nw - w)[0:temp_df.shape[0]]
    print("Target-",target)
    temp_df = temp_df[['end_of_over','runs_after_over','wickets_in_over','lose','win']]
    return temp_df,target

In [21]:
pickle.dump(pipe,open('pipes.pkl','wb'))