## Importing Dataset and required libraries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv('/content/IPL_Match_Highlights_Commentary.csv')
data

Unnamed: 0,Match_id,Team,Over_num,Commentary
0,4281444,RCB 1st Inns,0.5,"Nehra to Mandeep, FOUR, first boundary for Man..."
1,4281444,RCB 1st Inns,1.0,"Nehra to Mandeep, FOUR, back-to-back boundarie..."
2,4281444,RCB 1st Inns,10.0,"Henriques to Kedar Jadhav, FOUR, hit straight ..."
3,4281444,RCB 1st Inns,10.4,"Nehra to Kedar Jadhav, FOUR, another full toss..."
4,4281444,RCB 1st Inns,10.5,"Nehra to Kedar Jadhav, FOUR, four more. Jadhav..."
...,...,...,...,...
11569,5937426,RR 1st Inns,4.0,"Rabada to Smith, FOUR, who'd've thought the wo..."
11570,5937426,RR 1st Inns,5.2,"Harshal Patel to Jaiswal, SIX, short ball, and..."
11571,5937426,RR 1st Inns,6.2,"Ashwin to Smith, FOUR, a rare rank half-tracke..."
11572,5937426,RR 1st Inns,7.0,"Ashwin to Jaiswal, 1 run, pushes this one hast..."


## Data Preprocessing

In [None]:
# Converting into lowercase
data.Commentary = data.Commentary.apply(lambda x : x.lower())

## Creating Template

we are going to create a template which contains
1. batsman
2. bowler
3. speed of the ball
4. ball type
5. runs/wicket

In [None]:
scorecard_template = {'Batsman' : '', 'Bowler' : '', 'Speed' : '', 'Ball' : '', 'Runs/Wicket' : ''}
scorecard_template

{'Batsman': '', 'Bowler': '', 'Speed': '', 'Ball': '', 'Runs/Wicket': ''}

## Information Extraction using Rule Based Methods like Regex

In [None]:
import re

def extract_information(commentary):

  # Extracting Batsman
  batsman = commentary.split(',')[0].split(' to ')[1]

  # Extracting Bowler
  bowler = commentary.split(',')[0].split(' to ')[0]

  # Extracting speed
  speed_match = re.search(r"(\d+(\.\d+)?)kph", commentary)
  if speed_match:
    speed = speed_match.group(1)+' kmph'
  else:
    speed = 'NA'

  # Ball type : wide / normal / no ball
  ball_type = 'wide' if 'wide' in commentary.split(',')[1] else 'no ball' if 'no ball' in commentary.split(',')[1] else 'normal'

  # if it is a wide ball
  if ball_type == 'wide':
    if re.findall(r'run out', commentary.split(',')[2]):
      rw = 'run out'
    elif re.findall(r'stumped', commentary.split(',')[2]):
      rw = 'stump out'      
    else:
      rw = 'no run'
  
  # if it is a no ball
  elif ball_type == 'no ball':
    r_w = commentary.split(',')[2]
    out = re.findall(r'out', r_w)
    byes = re.findall(r'byes', r_w)
    if out:
      rw = r_w.split('!!')[0][1:]
    elif byes:
      rw = commentary.split(',')[2][1:]
    else:
      rw = r_w[1:]    

  # for normal ball
  elif ball_type == 'normal':
    r_w = commentary.split(',')[1]
    out = re.findall(r'out', r_w)
    byes = re.findall(r'byes', r_w)
    if out:
      rw = r_w.split('!!')[0][1:]
    elif byes:
      rw = commentary.split(',')[2][1:]
    else:
      rw = r_w[1:]

  return batsman, bowler, ball_type, rw, speed


### Results

In [None]:
commentary_number = int(input("Enter Commentary Number :"))
comment = data.Commentary[commentary_number]

batsman, bowler, ball, rws, speed = extract_information(comment)

print()
print('*'*50)
print()
print('Given Commentary line : ',comment)
print()
print('*'*50)
print()

print('Batsman    :', batsman.title())
print('Bowler     :', bowler.title())
print('Ball Speed :', speed)
print('Ball       :', ball.title())
print('Runs/Out   :', rws.title())

Enter Commentary Number :10107

**************************************************

Given Commentary line :  hooda to dhawan, wide, out stumped!! that's an excellent piece of keeping from saha, nearly matched msd (in terms of alertness) there. hooda may have second guessed dhawan's intentions and he hurled the ball miles down the leg-side and kept it short too; dhawan was never backing away, went charging down and missed the ball completely. looked to force himself back into the crease, but saha did a wonderful job stretching across to take the bails off in the nick of time. dhawan st saha b hooda 17(16) [4s-3]

**************************************************

Batsman    : Dhawan
Bowler     : Hooda
Ball Speed : NA
Ball       : Wide
Runs/Out   : Stump Out


In [None]:
comment

"hooda to dhawan, wide, out stumped!! that's an excellent piece of keeping from saha, nearly matched msd (in terms of alertness) there. hooda may have second guessed dhawan's intentions and he hurled the ball miles down the leg-side and kept it short too; dhawan was never backing away, went charging down and missed the ball completely. looked to force himself back into the crease, but saha did a wonderful job stretching across to take the bails off in the nick of time. dhawan st saha b hooda 17(16) [4s-3]"

## Template Matching
Matching the extracted information with our template

In [None]:
scorecard_template['Batsman'] = batsman
scorecard_template['Bowler'] = bowler
scorecard_template['Speed'] = speed
scorecard_template['Ball'] = ball
scorecard_template['Runs/Wicket'] = rws

In [None]:
scorecard_template

{'Batsman': 'hardik pandya',
 'Bowler': 'tom curran',
 'Speed': '137.5 kmph',
 'Ball': 'normal',
 'Runs/Wicket': '2 runs'}