In [23]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import seaborn as sns

data = pd.read_csv("Virat_Kohli.csv")
print(data.head())


   Runs  BF  4s  6s     SR  Pos Dismissal  Inns   Opposition         Ground  \
0    12  22   1   0  54.54    2       lbw     1  v Sri Lanka       Dambulla   
1    37  67   6   0  55.22    2    caught     2  v Sri Lanka       Dambulla   
2    25  38   4   0  65.78    1   run out     1  v Sri Lanka  Colombo (RPS)   
3    54  66   7   0  81.81    1    bowled     1  v Sri Lanka  Colombo (RPS)   
4    31  46   3   1  67.39    1       lbw     2  v Sri Lanka  Colombo (RPS)   

  Start Date  
0  18-Aug-08  
1  20-Aug-08  
2  24-Aug-08  
3  27-Aug-08  
4  29-Aug-08  


### Let’s have a look at whether this dataset contains any null values or not before moving forward:

In [24]:
print(data.isnull().sum())

Runs          0
BF            0
4s            0
6s            0
SR            0
Pos           0
Dismissal     0
Inns          0
Opposition    0
Ground        0
Start Date    0
dtype: int64


### The dataset contains matches played by Virat Kohli between 18 August 2008 and 22 January 2017. So let’s have a look at the total runs scored by Virat Kohli:

In [25]:
# Total Runs Between 18-Aug-08 - 22-Jan-17
data["Runs"].sum()

6184

In [None]:
Now let’s have a look at the average of Virat Kohli during the same period:

In [26]:
# Average Runs Between 18-Aug-08 - 22-Jan-17
data["Runs"].mean()

46.84848484848485

### In ODIs, the batting average of 35-37 is considered a good average. So Virat Kohl’s batting average is good. Now let’s have a look at the trend of runs scored by Virat Kohli in his career from 18 August 2008 to 22 January 2017:

In [27]:
matches = data.index
figure = px.line(data, x=matches, y="Runs", 
                 title='Runs Scored by Virat Kohli Between 18-Aug-08 - 22-Jan-17')
figure.show()

### In so many innings played by Virat Kohli, he scored over 100 or close to it. That is a good sign of consistency. Now let’s see all the batting positions played by Virat Kohli:

In [28]:
# Batting Positions
data["Pos"] = data["Pos"].map({3.0: "Batting At 3", 4.0: "Batting At 4", 2.0: "Batting At 2", 
                               1.0: "Batting At 1", 7.0:"Batting At 7", 5.0:"Batting At 5", 
                               6.0: "batting At 6"})

Pos = data["Pos"].value_counts()
label = Pos.index
counts = Pos.values
colors = ['gold','lightgreen', "pink", "blue", "skyblue", "cyan", "orange"]

fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Number of Matches At Different Batting Positions')
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=30,
                  marker=dict(colors=colors, line=dict(color='black', width=3)))
fig.show()

### In more than 68% of all the innings played by Virat Kohli, he batted in the third position. Now let’s have a look at the total runs scored by Virat Kohli in different positions:

In [29]:
label = data["Pos"]
counts = data["Runs"]
colors = ['gold','lightgreen', "pink", "blue", "skyblue", "cyan", "orange"]

fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Runs By Virat Kohli At Different Batting Positions')
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=30,
                  marker=dict(colors=colors, line=dict(color='black', width=3)))
fig.show()

### More than 72% of the total runs scored by Virat Kohli are while batting at 3rd position. So we can say batting at 3rd position is perfect for Virat Kohli.

### Now let’s have a look at the number of centuries scored by Virat Kohli while batting in the first innings and second innings: 

In [30]:
centuries = data.query("Runs >= 100")
figure = px.bar(centuries, x=centuries["Inns"], y = centuries["Runs"], 
                color = centuries["Runs"],
                title="Centuries By Virat Kohli in First Innings Vs. Second Innings")
figure.show()

### So most of the centuries are scored while batting in the second innings. By this, we can say that Virat Kohli likes chasing scores. Now let’s have a look at the kind of dismissals Virat Kohli faced most of the time:

In [31]:
# Dismissals of Virat Kohli
dismissal = data["Dismissal"].value_counts()
label = dismissal.index
counts = dismissal.values
colors = ['gold','lightgreen', "pink", "blue", "skyblue", "cyan", "orange"]

fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Dismissals of Virat Kohli')
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=30,
                  marker=dict(colors=colors, line=dict(color='black', width=3)))
fig.show()

### So most of the time, Virat Kohli gets out by getting caught by the fielder or the keeper. Now let’s have a look at against which team Virat Kohli scored most of his runs:

In [32]:
figure = px.bar(data, x=data["Opposition"], y = data["Runs"], color = data["Runs"],
            title="Most Runs Against Teams")
figure.show()

### According to the above figure, Virat Kohli likes batting against Sri Lanka, Australia, New Zealand, West Indies, and England. But he scored most of his runs while batting against Sri Lanka. Now let’s have a look at against which team Virat Kohli scored most of his centuries: 

In [33]:
figure = px.bar(centuries, x=centuries["Opposition"], y = centuries["Runs"], 
                color = centuries["Runs"],
                title="Most Centuries Against Teams")
figure.show()

### So, most of the centuries scored by Virat Kohli were against Australia. Now let’s analyze Virat Kohli’s strike rate. To analyze Virat Kohli’s strike rate, I will create a new dataset of all the matches played by Virat Kohli where his strike rate was more than 120: 

In [34]:
strike_rate = data.query("SR >= 120")
print(strike_rate)

     Runs  BF  4s  6s      SR           Pos Dismissal  Inns     Opposition  \
8      27  19   4   0  142.10  Batting At 7    bowled     1    v Sri Lanka   
32    100  83   8   2  120.48  Batting At 4   not out     1   v Bangladesh   
56     23  11   3   0  209.09  batting At 6   not out     1  v West Indies   
76     43  34   4   1  126.47  Batting At 3    caught     1      v England   
78    102  83  13   2  122.89  Batting At 3    caught     1  v West Indies   
83    100  52   8   7  192.30  Batting At 3   not out     2    v Australia   
85    115  66  18   1  174.24  Batting At 3   not out     2    v Australia   
93     78  65   7   2  120.00  Batting At 3    caught     2  v New Zealand   
130     8   5   2   0  160.00  Batting At 3    caught     1      v England   

            Ground Start Date  
8           Rajkot  15-Dec-09  
32           Dhaka  19-Feb-11  
56          Indore  08-Dec-11  
76      Birmingham  23-Jun-13  
78   Port of Spain  05-Jul-13  
83          Jaipur  16-Oct-

### Now let’s see whether Virat Kohli plays with high strike rates in the first innings or second innings: 

In [35]:
figure = px.bar(strike_rate, x = strike_rate["Inns"], 
                y = strike_rate["SR"], 
                color = strike_rate["SR"],
            title="Virat Kohli's High Strike Rates in First Innings Vs. Second Innings")
figure.show()

# 71 Centuries of Virat Kohli 

In [38]:
df = pd.read_csv('71 Centuries of Virat Kohli.csv')
df.shape

(71, 15)

In [39]:
df.columns

Index(['Score', 'Out/Not Out', 'Against', 'Batting Order', 'Inn.',
       'Strike Rate', 'Venue', 'Column1', 'H/A', 'Date', 'Result', 'Format',
       'Man of the Match', 'Captain', 'Unnamed: 14'],
      dtype='object')

In [40]:
df.drop('Unnamed: 14',axis=1,inplace=True)

In [41]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 71 entries, 0 to 70
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Score             71 non-null     int64  
 1   Out/Not Out       71 non-null     object 
 2   Against           71 non-null     object 
 3   Batting Order     71 non-null     int64  
 4   Inn.              71 non-null     int64  
 5   Strike Rate       44 non-null     float64
 6   Venue             71 non-null     object 
 7   Column1           71 non-null     object 
 8   H/A               71 non-null     object 
 9   Date              71 non-null     object 
 10  Result            71 non-null     object 
 11  Format            71 non-null     object 
 12  Man of the Match  71 non-null     object 
 13  Captain           71 non-null     object 
dtypes: float64(1), int64(3), object(10)
memory usage: 7.9+ KB


In [42]:
df['Date'] =  pd.to_datetime(df['Date'], format='%d-%m-%Y')

In [43]:
df.isnull().sum()

Score                0
Out/Not Out          0
Against              0
Batting Order        0
Inn.                 0
Strike Rate         27
Venue                0
Column1              0
H/A                  0
Date                 0
Result               0
Format               0
Man of the Match     0
Captain              0
dtype: int64

### These all are Test Centuries here Strike Rate is not that important.

In [44]:
df[df['Strike Rate'].isnull()]

Unnamed: 0,Score,Out/Not Out,Against,Batting Order,Inn.,Strike Rate,Venue,Column1,H/A,Date,Result,Format,Man of the Match,Captain
0,116,Out,Australia,6,2,,Adelaide Oval,Adelaide,Away,2012-01-24,Lost,Test,No,No
1,103,Out,New Zealand,5,2,,M. Chinnaswamy Stadium,Bangalore,Home,2012-08-31,Won,Test,Yes,No
2,103,Out,England,5,2,,Vidarbha Cricket Association Stadium,Nagpur,Home,2012-12-13,Drawn,Test,No,No
3,107,Out,Australia,5,2,,M. A. Chidambaram Stadium,Chennai,Home,2013-02-22,Won,Test,No,No
4,119,Out,South Africa,4,1,,Wanderers Stadium,Johannesburg,Away,2013-12-18,Drawn,Test,No,No
5,105,Not Out,New Zealand,4,4,,Basin Reserve,Wellington,Away,2014-02-14,Drawn,Test,No,No
6,115,Out,Australia,4,2,,Adelaide Oval,Adelaide,Away,2014-12-09,Lost,Test,No,Yes
7,141,Out,Australia,4,4,,Adelaide Oval,Adelaide,Away,2014-12-09,Lost,Test,No,Yes
8,169,Out,Australia,4,2,,Melbourne Cricket Ground,Melbourne,Away,2014-12-26,Drawn,Test,No,No
9,147,Out,Australia,4,2,,Sydney Cricket Ground,Sydney,Away,2015-01-06,Drawn,Test,No,Yes


In [45]:
df['Strike Rate'] = df['Strike Rate'].fillna(0)

#### Statastical Description for ODIs and T20Is 

In [46]:
df.isnull().sum()

Score               0
Out/Not Out         0
Against             0
Batting Order       0
Inn.                0
Strike Rate         0
Venue               0
Column1             0
H/A                 0
Date                0
Result              0
Format              0
Man of the Match    0
Captain             0
dtype: int64

In [47]:
df.duplicated().sum()

0

### Statastical Distribution for Test Matches

In [48]:
df[df['Format']!='Test'][['Score','Strike Rate']].describe()

Unnamed: 0,Score,Strike Rate
count,44.0,44.0
mean,120.840909,114.019545
std,17.743578,25.257567
min,100.0,84.9
25%,107.0,96.6325
50%,116.5,108.935
75%,128.25,120.7875
max,183.0,200.0


### Observations:

####    > Avg. Score of Virat Kohli in ODIs/T20Is is 121 in those matches where he have scored Century.
####    > His maximum Score in white ball cricket is 183 Runs.
####    > He have Scored Centuries with the avg.strike rate of 144 in limited over format.

In [49]:
df[df['Format']=='Test']['Score'].describe()

count     27.000000
mean     150.555556
std       48.817031
min      103.000000
25%      106.000000
50%      139.000000
75%      184.500000
max      254.000000
Name: Score, dtype: float64

### Observations:

####    > Virat's Maximum Score in Test cricket is 254
####    > Avg.score of Virat Kohli in those matches where he reached century is 150 

### Greatest in Every Format: Virat Kohli had Scored Tons in each format 43 in ODIs 27 in TEST and 1 in T20I Cricket 

In [50]:
px.pie(values = df.Format.value_counts().values,names=df.Format.value_counts().index,hole=0.5,title="Virat Kohli's Tons")

### Here we hae Used Rank function because our aim is to find the first and latest century Scored by Virat in each format. 

In [51]:
df['Format_century_no'] = df.groupby(['Format'])['Date'].rank(method='max').astype(int)

### Virat Kohli's First Century in each Format :

####      > Test : 116, Ind vs Aus (24th Jan 2012) at Adelaide Oval,Australia 
####          (India Lost this Match while chasing Kohli was batting at   number 6)

####     > ODI : 107(Out, str.rate: 93.85), Ind vs SL (24th Dec 2009) at Eden Garden,Kolkata,India 
####          (India won the Match while chasing Kohli was batting at number 4)

####      > T20I : 122(Not Out, str.rate: 200), Ind vs Afg (08th Sep 2022) at Dubai International Stadium,Dubai
####          (India Won this match while defending,Kohli was opener and Virat was awarded with MOM)

In [52]:
df[df.Format_century_no ==1].iloc[:,0:-1]

Unnamed: 0,Score,Out/Not Out,Against,Batting Order,Inn.,Strike Rate,Venue,Column1,H/A,Date,Result,Format,Man of the Match,Captain
0,116,Out,Australia,6,2,0.0,Adelaide Oval,Adelaide,Away,2012-01-24,Lost,Test,No,No
27,107,Out,Sri Lanka,4,2,93.85,Eden Gardens,Kolkata,Home,2009-12-24,Won,ODI,No,No
70,122,Not Out,Afganistan,1,1,200.0,Dubai International Cricket Stadium,Dubai,Away,2022-09-08,Won,T20I,Yes,No


### Virat Kohli's latest Century in each Format :

####       > Test : 136 (Out), Ind vs Ban (22th Nov 2019) at Eden Garden,Kolkata,India (India won the Match while chasing,Virat batted at number 4 and Kohli was leading India)

####       > ODI : 114 (Not Out, str.rate: 115.15), Ind vs WI (14th Aug 2019) at Queen's Park Oval,Port of Spain (India won the Match while chasing,Virat batted at number 3,Virat was leading the team and was also awarded with Man of the Match)

####       > T20I : 122 (Not Out, str.rate: 200), Ind vs Afg (08th Sep 2022) at Dubai International Stadium,Dubai (India Won this match while defending and Virat was awarded with MOM)

In [53]:
df[(df.Format_century_no ==43)|(df.Format_century_no ==27)|(df.Format_century_no ==1)].iloc[:,:-1].sort_values(by='Date').tail(3)

Unnamed: 0,Score,Out/Not Out,Against,Batting Order,Inn.,Strike Rate,Venue,Column1,H/A,Date,Result,Format,Man of the Match,Captain
69,114,Not Out,West Indies,3,2,115.15,Queen's Park Oval,Port of Spain,Away,2019-08-14,Won,ODI,Yes,Yes
26,136,Out,Bangladesh,4,2,0.0,Eden Gardens,Kolkata,Home,2019-11-22,Won,Test,No,Yes
70,122,Not Out,Afganistan,1,1,200.0,Dubai International Cricket Stadium,Dubai,Away,2022-09-08,Won,T20I,Yes,No


### Observation :

####        When Virat scored 100 last time India won matches in all 3 Formats

### Virat's Centuries against other Teams

In [54]:
Tonvs = df.groupby(['Against','Format','Result'])['Score'].count().reset_index().rename(columns ={'Score':'Number_of_Centuries'})
px.bar(data_frame=Tonvs,x='Against',y='Number_of_Centuries',color ='Format',text_auto=True,pattern_shape='Result')

### Observations :

####    > Virat Scored maximum 15 Centuries against the Giant Aussies out of which 8 were in ODIs and 7 were in 
####      TESTs which is followed by Sri Lanka(total = 13, ODIs = 8, TESTs= 5).

####    > Pakistan,Zimbabwe and Afganistan are the teams against whome Virat haven't scored any Test Hundred.

####    > Virat had Scored his only T20I Century against Afganistan

### Virat's Centuries Over the years 

In [55]:
df = df.sort_values(by=['Date'])

In [56]:
l = []
for i in range(1,72):
    l.append(i)

In [57]:
df['Int_Century_count'] = l

In [58]:
fig = px.line(data_frame=df,x='Date',y='Int_Century_count',markers=True,title="Virat's International Hundreds")
fig.update_layout(title_x=0.5)
fig.show()

In [59]:
centuries_by_year = df.groupby([df['Date'].apply(lambda x: x.year),'Format'])['Score'].count().reset_index().rename(columns={'Score':'Number_of_Centuries'})

In [60]:
px.line(data_frame=centuries_by_year,x='Date',y='Number_of_Centuries',color='Format',markers=True,symbol='Format')

In [61]:
df['Date'].value_counts()

2014-12-09    2
2017-08-31    1
2017-12-02    1
2017-11-24    1
2017-11-16    1
             ..
2015-02-15    1
2015-08-12    1
2015-10-22    1
2016-01-17    1
2022-09-08    1
Name: Date, Length: 70, dtype: int64

### At Adelaide Oval in 2014 against Australia Virat had complited 2 Centuries on Same date i.e. 09th Dec 2014 where he was leading the team but despite of his performance India have to lost the match.

####         - In Second Inning Virat was batting at number 4 and got out on 115
####         - In Fourth Inning he was batting at number 4 and got out on 141 

In [62]:
df[df['Date']=='2014-12-09']

Unnamed: 0,Score,Out/Not Out,Against,Batting Order,Inn.,Strike Rate,Venue,Column1,H/A,Date,Result,Format,Man of the Match,Captain,Format_century_no,Int_Century_count
7,141,Out,Australia,4,4,0.0,Adelaide Oval,Adelaide,Away,2014-12-09,Lost,Test,No,Yes,8,28
6,115,Out,Australia,4,2,0.0,Adelaide Oval,Adelaide,Away,2014-12-09,Lost,Test,No,Yes,8,29


### MOM while Scoring Centuries 

In [63]:
mom_cen = df.groupby(['Format','Man of the Match'])['Score'].count().reset_index()

In [64]:
fig = px.sunburst(mom_cen,path=['Format','Man of the Match','Score'],values='Score')
fig.update_layout(title='Number of Centuries',title_x=0.5)

### Observtions:

####       ODIs : Out of his 43 Centuries in ODI he won MOM 29 times.
####       TESTs : Out of his 27 Centuries in TEST he won MOM 8 times.
####       T20Is : He was awarded MOM for his Maiden T20I Ton.

### Win Percent and Virat's Centuries 

In [65]:
ton_win = df.groupby(['Result','Format'])['Score'].count().reset_index()

In [66]:
fig = px.sunburst(ton_win,path=['Format','Result'],values='Score')
fig.update_traces(textinfo="label+percent parent")
fig.update_layout(title="Winning and Losing pecentage of India in all three Formats w.r.t Virat's Century",title_x=0.5)
fig.show()

### Observations:

####       India had Won 81% of matches when Virat Scored Century in ODIs
####       India had Won 48% and Drawn 26% of matches when Virat Scored Century in TESTs

In [67]:
df_results_against = df.groupby(['Format','Result','Against'])['Score'].count().reset_index()

In [68]:
fig = px.sunburst(df_results_against,path=['Format','Result','Against'],values='Score')
fig.update_traces(textinfo="label+percent parent")

### From the above plot one thing we can clearly observe and that is when ever india lost beside Kohli's Century maximum times opponent was Australia. So though Virat had scored maximum Centuries against Ausssies the Percentage of Winning is not maximum against them 

### Kohli's Centuries Aginst Aussies 

In [69]:
fig = px.sunburst(df_results_against[df_results_against['Against']=='Australia'],path=['Format','Result'],values='Score')
fig.update_traces(textinfo="label+percent parent")

### Yes, we were Right beside Kohli's Century India have lost 57% of it's Test matches, 38% of ODIs against Australia. 

### Kohli's Centuries as Captain 

In [70]:
ton_cap = df[df['Captain']=='Yes'].groupby(['Captain','Format','Against','Result'])['Score'].count().reset_index()
fig = px.sunburst(ton_cap,path=['Captain','Format','Against','Result'],values='Score')
fig.update_traces(textinfo="label+percent parent")
fig.update_layout(title="Virat's Centuries as a captain and winning Percent of India",title_x=0.5)

### Observation:

### When Virat was leading the team:

####      > He scored maximum 33% of his Centuries against West Indies in ODIs out of which India won 71% of matches.

####      > He scored maximum 25% of his Centuries against Sri Lanka in TESTs out of which India had won 40% matches 
####       where as 40% were ended in Draw. 

### Kohli's Centuries at Home vs Away 

In [72]:
H_a = df.groupby(['Against','H/A'])['Score'].count().reset_index().rename(columns={'Score':'Centuries'})
fig = px.bar(data_frame=H_a,x='Against',y='Centuries',color='H/A',text_auto=True)
fig.update_layout(title = "Virat's Tons at Home and Away from Home", title_x=0.5)
fig.show()

### Kohli's Centuries as a captain at Home vs Away

In [73]:
H_a = df.groupby(['Captain','Format','H/A'])['Score'].count().reset_index().rename(columns={'Score':'Centuries'})
fig = px.bar(data_frame=H_a,x='Captain',y='Centuries',color='H/A',pattern_shape='Format',text_auto=True)
fig.update_layout(title = "Virat's Tons at Home and Away from Home as a Captain", title_x=0.5)
fig.show()

# To Be Continued !!!!