# IPL match prediction
Here we are going to extract the following match details
1. IPL Season
2. Match ID (Match no)
3. Team 1 (Team that has batted first)
4. Team 2
5. Winning team
6. Innings 1 runs
7. Innings 1 wickets
8. Innings 1 overs
9. Innings 2 runs 
10. Innings 2 wickets
11. Innings 2 overs
12. Venue
13. Date

And then use the data to predict which team will win

In [1]:
import requests
import re
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
records = []
match_id = 10618

while match_id!=10678:
    url = "https://www.cricbuzz.com/api/html/cricket-scorecard/"+str(match_id)
    res = requests.get(url)
    src = res.content
    soup = BeautifulSoup(src,'html.parser')
    try:
        team_won = soup.select("div.cb-col.cb-scrcrd-status.cb-col-100.cb-text-complete")
        statement = team_won[0].text.strip()
        exp_for_winning_team = re.compile(r'(.+)won',re.IGNORECASE)
        search_winner = exp_for_winning_team.search(statement)
        winning_team = search_winner.group(1).strip()
        
        teams_playing = soup.select(".cb-col.cb-col-100.cb-scrd-hdr-rw")
        teams_playing_strip = [x.text.strip() for x in teams_playing]
        #print(teams_playing_strip)
        exp_teams = re.compile(r'(.+)innings',re.IGNORECASE)
        search_team1 = exp_teams.search(teams_playing_strip[0])
        team1 = search_team1.group(1).strip()
        search_team2 = exp_teams.search(teams_playing_strip[1])
        team2 = search_team2.group(1).strip()
        #print(team1,team2)
        exp_runs_wickets = re.compile(r'[\d.]+')
        search_runs_wickets1 = exp_runs_wickets.findall(teams_playing_strip[0])
        team1_runs = search_runs_wickets1[0]
        team1_wickets = search_runs_wickets1[1]
        team1_overs = search_runs_wickets1[2]
        search_runs_wickets2 = exp_runs_wickets.findall(teams_playing_strip[1])
        team2_runs = search_runs_wickets2[0]
        team2_wickets = search_runs_wickets2[1]
        team2_overs = search_runs_wickets2[2]
        
        match_info = soup.select('.cb-mtch-info-itm')
        match_info_strip = [x.text.strip() for x in match_info]
        exp_venue = re.compile(r'venue([\w\s,\.]+)',re.IGNORECASE)
        #print(match_info_strip[1])

        for x in range(0,len(match_info_strip)):
            venue_obj = exp_venue.search(match_info_strip[x])
            if venue_obj:
                venue = venue_obj.group(1).strip()
        #print(venue)

        exp_date_month = re.compile(r'date[\w\s]*,([\w\s]+),[\D\s]*?(\d+)\s?',re.IGNORECASE)

        for x in range(0,len(match_info_strip)):
            month_obj = exp_date_month.search(match_info_strip[x])
            if month_obj:
                l = re.split(r'\s',month_obj.group(1).strip(),1)
                month = l[0]
                date = l[1]
                year = month_obj.group(2).strip()
                final_date = month + '-' + date + '-' + year
        records.append((3,match_id,team1,team2,winning_team,team1_runs,team1_wickets,team1_overs,
               team2_runs,team2_wickets,team2_overs,venue,final_date))
        
    except:
        print('Exception at match id',match_id)
        
    match_id+=1


In [3]:
df = pd.DataFrame(records,columns=['IPL season','Match Id','Team 1','Team 2','Winning Team','Innings1 runs',
                                       'Innings1 wickets','Innings1 overs','Innings2 runs','Innings2 wickets','Innings2 overs',
                                      'Venue','Date'])
df.Date = pd.to_datetime(df['Date'])
df.to_csv('ipltrail2.csv',index=False,encoding='utf-8')

In [4]:
def predict(team1,team2,stadium=None):
    df = pd.read_csv('ipltrail2.csv')
    team1_wins=0
    team2_wins=0
    for index,row in df.iterrows():
        if ((row["Team 1"]==team1 and row["Team 2"]==team2) or (row["Team 2"]==team1 and row["Team 1"]==team2)):
            if row["Winning Team"]==team1:
                team1_wins+=1
            else:
                team2_wins+=1
    print(team1+" has won "+str(team1_wins)+" matches")
    print(team2+" has won "+str(team2_wins)+" matches")
    print("chances of team1 winning is "+str((team1_wins/(team1_wins+team2_wins))*100)+"%")
    print("chances of team2 winning is "+str((team2_wins/(team1_wins+team2_wins))*100)+"%")

In [5]:
predict("Mumbai Indians","Royal Challengers Bangalore")

Mumbai Indians has won 2 matches
Royal Challengers Bangalore has won 1 matches
chances of team1 winning is 66.66666666666666%
chances of team2 winning is 33.33333333333333%
