## Import Library

In [1]:
import pandas as pd
from heapq import nlargest

## Import Data

In [2]:
data = pd.read_csv('2019_ODI.csv')
data.head()

Unnamed: 0,Team 1,Team 2,Winner,Margin,Ground,Country,Match Date,Scorecard
0,New Zealand,Pakistan,New Zealand,61 runs,Wellington,New Zealand,06-Jan-18,ODI # 3946
1,New Zealand,Pakistan,New Zealand,8 wickets,Nelson,Canada,09-Jan-18,ODI # 3947
2,U.A.E.,Ireland,Ireland,4 wickets,ICCA Dubai,Dubai,11-Jan-18,ODI # 3948
3,New Zealand,Pakistan,New Zealand,183 runs,Dunedin,New Zealand,13-Jan-18,ODI # 3949
4,U.A.E.,Ireland,Ireland,67 runs,ICCA Dubai,Dubai,13-Jan-18,ODI # 3950


### Updating datalist

In [3]:
data['Win by runs'] = ""
data['Win by wickets'] = ""
data['Date'] = ""
data['Month'] = ""
data['Year'] = ""

for i in range(len(data)):
    if data['Margin'][i] == 'None':
        pass
    else:
        m = data['Margin'][i]
        mrg = m.split(' ')
        if mrg[1] == 'runs':
            data['Win by runs'][i] = int(mrg[0])
        elif mrg[1] == 'wickets':
            data['Win by wickets'][i] = int(mrg[0])
    
    date = data['Match Date'][i]
    dd = date.split('-')
    data['Date'][i] = dd[0]
    data['Month'][i] = dd[1]  
    data['Year'][i] = dd[2]

            
data.head()

Unnamed: 0,Team 1,Team 2,Winner,Margin,Ground,Country,Match Date,Scorecard,Win by runs,Win by wickets,Date,Month,Year
0,New Zealand,Pakistan,New Zealand,61 runs,Wellington,New Zealand,06-Jan-18,ODI # 3946,61.0,,6,Jan,18
1,New Zealand,Pakistan,New Zealand,8 wickets,Nelson,Canada,09-Jan-18,ODI # 3947,,8.0,9,Jan,18
2,U.A.E.,Ireland,Ireland,4 wickets,ICCA Dubai,Dubai,11-Jan-18,ODI # 3948,,4.0,11,Jan,18
3,New Zealand,Pakistan,New Zealand,183 runs,Dunedin,New Zealand,13-Jan-18,ODI # 3949,183.0,,13,Jan,18
4,U.A.E.,Ireland,Ireland,67 runs,ICCA Dubai,Dubai,13-Jan-18,ODI # 3950,67.0,,13,Jan,18


### Getting team list

In [4]:
team_list = []
for i in range(len(data)):
    if data['Team 1'][i] not in team_list:
        team_list.append(data['Team 1'][i])
    if data['Team 2'][i] not in team_list:
        team_list.append(data['Team 2'][i])

print(f"There are total of {len(team_list)} teams. They are ")
for i in team_list:
    print(i)

There are total of 18 teams. They are 
New Zealand
Pakistan
U.A.E.
Ireland
Australia
England
Bangladesh
Zimbabwe
Scotland
Sri Lanka
South Africa
India
Afghanistan
P.N.G.
Hong Kong
West Indies
Netherlands
Nepal


### Top 3 played team and winners

In [5]:
team_play = {}
team_win = {}
for i in range(len(data)):
    if data['Team 1'][i] in team_play:
        team_play[data['Team 1'][i]] += 1
    else:
        team_play[data['Team 1'][i]] = 1
        
    if data['Team 2'][i] in team_play:
        team_play[data['Team 2'][i]] += 1
    else:
        team_play[data['Team 2'][i]] = 1
        
    if data['Winner'][i] in team_win:
        team_win[data['Winner'][i]] += 1
    else:
        team_win[data['Winner'][i]] = 1
    

three_largest = nlargest(3, team_play, key=team_play.get)
three_wins = nlargest(3, team_win, key=team_win.get)

print(f"The Country played most in 2018 are {three_largest[0]} by {team_play[three_largest[0]]} games, {three_largest[1]} by {team_play[three_largest[1]]} games and {three_largest[2]} by {team_play[three_largest[2]]} games")
print(f"Top three countries who win most in 2018 are {three_wins[0]} by {team_win[three_wins[0]]} wins, {three_wins[1]} by {team_win[three_wins[1]]} wins and {three_wins[2]} by {team_win[three_wins[2]]} wins.")

The Country played most in 2018 are Zimbabwe by 26 games, England by 24 games and Bangladesh by 20 games
Top three countries who win most in 2018 are England by 17 wins, India by 14 wins and Bangladesh by 13 wins.


### Most matches in home country

In [6]:
home_play = {}
for i in range(len(data)):
    if data['Team 1'][i] == data['Country'][i]:
        if data['Team 1'][i] in home_play:
            home_play[data['Team 1'][i]] += 1
        else:
            home_play[data['Team 1'][i]] = 1
            
    if data['Team 2'][i] == data['Country'][i]:
        if data['Team 2'][i] in home_play:
            home_play[data['Team 2'][i]] += 1
        else:
            home_play[data['Team 2'][i]] = 1
home_largest = nlargest(3, home_play, key=home_play.get)

print(f"The top 3 countries who played most in their home ground are {home_largest[0]} with {home_play[home_largest[0]]} games, {home_largest[1]} with {home_play[home_largest[1]]} games and {home_largest[2]} with {home_play[home_largest[2]]} games")

The top 3 countries who played most in their home ground are Zimbabwe with 11 games, Bangladesh with 10 games and Sri Lanka with 10 games


### Most toured team

In [7]:
tour_team = {}
for i in range(len(data)):
    if data['Team 1'][i] != data['Country'][i]:
        if data['Team 1'][i] in tour_team:
            tour_team[data['Team 1'][i]] += 1
        else:
            tour_team[data['Team 1'][i]] = 1
    if data['Team 2'][i] != data['Country'][i]:
        if data['Team 2'][i] in tour_team:
            tour_team[data['Team 2'][i]] += 1
        else:
            tour_team[data['Team 2'][i]] = 1

tour_sort = sorted(tour_team, key=tour_team.get, reverse=True)
tour_largest = nlargest(3, tour_team, key=tour_team.get)
print(f"The top 3 teams toured are {tour_largest[0]} with {tour_team[tour_largest[0]]} games, {tour_largest[1]} with {tour_team[tour_largest[1]]} games and {tour_largest[2]} with {tour_team[tour_largest[2]]} games.")

The top 3 teams toured are Afghanistan with 20 games, Pakistan with 18 games and West Indies with 18 games.


### Performance of a single country. Here its Sri Lanka

In [8]:
team_history = {'team':'Sri Lanka','totalGame':0,'chase':0,'defent':0,
                'win':0,'chaseWin':0,'defentWin':0,'homeWin':0,'awayWin':0}
team = team_history['team']
team_history['totalGame'] = team_play[team]
for i in range(len(data)):
    if data['Winner'][i] == team:
        team_history['win'] += 1
        
    if (data['Team 1'][i] == team):
        team_history['defent'] += 1
        
    if (data['Team 2'][i] == team):
        team_history['chase'] += 1
        
    if (data['Team 2'][i] == team) and (data['Winner'][i] == team):
        team_history['chaseWin'] += 1
        
    if (data['Team 1'][i] == team) and (data['Winner'][i] == team):
        team_history['defentWin'] += 1
        
    if (data['Country'][i] == team) and (data['Winner'][i] == team):
        team_history['homeWin'] += 1
        
    if (data['Country'][i] != team) and (data['Winner'][i] == team):
        team_history['awayWin'] += 1
        
print(f"Performance of {team_history['team']}. They played as total of {team_history['totalGame']} games in which they won {team_history['win']} games. In {team_history['totalGame']} games, {team_history['chase']} are chase and rest {team_history['defent']} is defent. In {team_history['win']} wins {team_history['chaseWin']} of them are by chase and rest {team_history['defentWin']} are by defent. Also {team_history['homeWin']} home wins and {team_history['awayWin']} away wins.")

Performance of Sri Lanka. They played as total of 17 games in which they won 6 games. In 17 games, 5 are chase and rest 12 is defent. In 6 wins 2 of them are by chase and rest 4 are by defent. Also 3 home wins and 3 away wins.


### Top 3 wins by run

In [9]:
by_run={}
for i in range(len(data)):
    if data['Win by runs'][i] == '':
        pass
    else:
        winner = data['Winner'][i]
        if data['Team 1'][i] == winner:
            looser = data['Team 2'][i]
        else:
            looser = data['Team 1'][i]
        by_run[data['Win by runs'][i]] = {'win':winner,'loss':looser}
k = []
for i in by_run:
    k.append(i)
l = sorted(k,reverse=True)[:3]

print(f"The top 3 wins are: \n{by_run[l[0]]['win']} wins {by_run[l[0]]['loss']} by {l[0]} runs \n{by_run[l[1]]['win']} wins {by_run[l[1]]['loss']} by {l[1]} runs \n{by_run[l[2]]['win']} wins {by_run[l[2]]['loss']} by {l[2]} runs")

The top 3 wins are: 
Pakistan wins Zimbabwe by 244 runs 
England wins Australia by 242 runs 
Ireland wins U.A.E. by 226 runs


### Month with most ODI

In [10]:
match_per_month = {}
for i in range(len(data)):
    if data['Month'][i] in match_per_month:
        match_per_month[data['Month'][i]] += 1
    else:
        match_per_month[data['Month'][i]] = 1
        
month_sort = sorted(match_per_month, key=match_per_month.get, reverse=True)
print(f"Month with most ODI played is {month_sort[0]} with {match_per_month[month_sort[0]]} games")

Month with most ODI played is Mar with 26 games


### Most played ground

In [11]:
on_ground = {}
for i in range(len(data)):
    if data['Ground'][i] in on_ground:
        on_ground[data['Ground'][i]] += 1
    else:
        on_ground[data['Ground'][i]] = 1

ground_sort = sorted(on_ground, key=on_ground.get, reverse=True)
print('The most played ground is '+ground_sort[0]+' with '+str(on_ground[ground_sort[0]])+' games')

The most played ground is Harare with 16 games


### India win by chase or defent

In [12]:
team_history = {'team':'India','totalGame':0,'chase':0,'defent':0,
                'win':0,'chaseWin':0,'defentWin':0,'homeWin':0,'awayWin':0}
team = team_history['team']
team_history['totalGame'] = team_play[team]
for i in range(len(data)):
    if data['Winner'][i] == team:
        team_history['win'] += 1
        
    if (data['Team 1'][i] == team):
        team_history['defent'] += 1
        
    if (data['Team 2'][i] == team):
        team_history['chase'] += 1
        
    if (data['Team 2'][i] == team) and (data['Winner'][i] == team):
        team_history['chaseWin'] += 1
        
    if (data['Team 1'][i] == team) and (data['Winner'][i] == team):
        team_history['defentWin'] += 1
        
    if (data['Country'][i] == team) and (data['Winner'][i] == team):
        team_history['homeWin'] += 1
        
    if (data['Country'][i] != team) and (data['Winner'][i] == team):
        team_history['awayWin'] += 1

if team_history['chaseWin'] < team_history['defentWin']:
    print(f"{team} mostly win by playing first")
else:
    print(f"{team} mostly win by chase")

India mostly win by chase


### 3 Teams with most wins and valuated by months 

In [13]:
country_month = {}
month_per = {}
for i in range(len(data)):
    if data['Winner'][i] in country_month:
        country_month[data['Winner'][i]]['Win'] += 1
        country_month[data['Winner'][i]][data['Month'][i]] += 1
    else:
        country_month[data['Winner'][i]] = {'Win':1,'Jan': 0, 'Feb': 0, 'Mar': 0, 'Jun': 0, 'Jul': 0, 'Aug': 0, 'Sep': 0,
                                            'Oct': 0, 'Nov': 0, 'Dec': 0}
        country_month[data['Winner'][i]][data['Month'][i]] = 1

for i in three_wins:
    month_per[i] = {}
    for j in country_month[i]:
        month_per[i][j] = round(((country_month[i][j]/country_month[i]['Win'])*100),2)
        
print(f"Top three countries who win most in 2018 are ")
for i in month_per:
    print(f"{i} with {country_month[i]['Win']} wins which comes from follows months ")
    for j in month_per[i]:
        if j != 'Win':
            print(f"{j} : {country_month[i][j]} ({month_per[i][j]}%)")
    print(" ")

Top three countries who win most in 2018 are 
England with 17 wins which comes from follows months 
Jan : 4 (23.53%)
Feb : 1 (5.88%)
Mar : 2 (11.76%)
Jun : 5 (29.41%)
Jul : 2 (11.76%)
Aug : 0 (0.0%)
Sep : 0 (0.0%)
Oct : 3 (17.65%)
Nov : 0 (0.0%)
Dec : 0 (0.0%)
 
India with 14 wins which comes from follows months 
Jan : 0 (0.0%)
Feb : 5 (35.71%)
Mar : 0 (0.0%)
Jun : 0 (0.0%)
Jul : 1 (7.14%)
Aug : 0 (0.0%)
Sep : 5 (35.71%)
Oct : 2 (14.29%)
Nov : 1 (7.14%)
Dec : 0 (0.0%)
 
Bangladesh with 13 wins which comes from follows months 
Jan : 3 (23.08%)
Feb : 0 (0.0%)
Mar : 0 (0.0%)
Jun : 0 (0.0%)
Jul : 2 (15.38%)
Aug : 0 (0.0%)
Sep : 3 (23.08%)
Oct : 3 (23.08%)
Nov : 0 (0.0%)
Dec : 2 (15.38%)
 


### 3 countries with highest failure

In [14]:
lost_country = {}
lost_array = {}

for i in team_list:
    lost_country[i] = {'lost':0,'chase':0,'defent':0}
    lost_array[i] = 0
    
for i in range(len(data)):
    if data['Winner'][i] == data['Team 1'][i]:
        lost_country[data['Team 2'][i]]['lost'] += 1
        lost_array[data['Team 2'][i]] += 1
        if data['Win by runs'][i]:
            lost_country[data['Team 2'][i]]['defent'] += 1
        else: 
            lost_country[data['Team 2'][i]]['chase'] += 1
    else:
        lost_country[data['Team 1'][i]]['lost'] += 1
        lost_array[data['Team 1'][i]] += 1
        if data['Win by runs'][i]:
            lost_country[data['Team 1'][i]]['defent'] += 1
        else: 
            lost_country[data['Team 1'][i]]['chase'] += 1

last_3 = nlargest(3, lost_array, key=lost_array.get)

print(f"The most lost teams are {last_3[0]} with {lost_country[last_3[0]]['lost']} losses,{last_3[1]} with {lost_country[last_3[0]]['lost']} losses and {last_3[2]} with {lost_country[last_3[0]]['lost']} losses. In which")
print(f"{last_3[0]} lost {lost_country[last_3[0]]['chase']} by chase and {lost_country[last_3[0]]['defent']} by defenting")
print(f"{last_3[1]} lost {lost_country[last_3[1]]['chase']} by chase and {lost_country[last_3[1]]['defent']} by defenting")
print(f"{last_3[2]} lost {lost_country[last_3[2]]['chase']} by chase and {lost_country[last_3[2]]['defent']} by defenting")


The most lost teams are Zimbabwe with 21 losses,Australia with 21 losses and Sri Lanka with 21 losses. In which
Zimbabwe lost 12 by chase and 9 by defenting
Australia lost 6 by chase and 5 by defenting
Sri Lanka lost 4 by chase and 7 by defenting
