In [90]:
import pandas as pd  
import numpy as np

### Importing Raw Data

In [91]:
df = pd.read_csv("IPL_data.csv")
df

Unnamed: 0,HomeTeam,HomeScore,HomeOvers,AwayTeam,AwayScore,AwayOvers
0,Gujarat Titans,133/3,18.1,Rajasthan Royals,130/9,20.0
1,Rajasthan Royals,161/3,18.1,Royal Challengers Bangalore,157/8,20.0
2,Lucknow Super Giants,193/6,20.0,Royal Challengers Bangalore,207/4,20.0
3,Gujarat Titans,191/3,19.3,Rajasthan Royals,188/6,20.0
4,Sunrisers Hyderabad,157/8,20.0,Punjab Kings,160/5,15.1
...,...,...,...,...,...,...
69,Sunrisers Hyderabad,149/7,20.0,Rajasthan Royals,210/6,20.0
70,Gujarat Titans,161/5,19.4,Lucknow Super Giants,158/6,20.0
71,Royal Challengers Bangalore,205/2,20.0,Punjab Kings,208/5,19.0
72,Delhi Capitals,179/6,18.2,Mumbai Indians,177/5,20.0


### Splitting Score Columns

In [92]:
temp_df = df["HomeScore"].str.split("/", n=1, expand=True)
df["HomeRuns"] = temp_df[0]
df["HomeWickets"] = temp_df[1]
temp_df = df["AwayScore"].str.split("/", n=1, expand=True)
df["AwayRuns"] = temp_df[0]
df["AwayWickets"] = temp_df[1]
df

Unnamed: 0,HomeTeam,HomeScore,HomeOvers,AwayTeam,AwayScore,AwayOvers,HomeRuns,HomeWickets,AwayRuns,AwayWickets
0,Gujarat Titans,133/3,18.1,Rajasthan Royals,130/9,20.0,133,3,130,9
1,Rajasthan Royals,161/3,18.1,Royal Challengers Bangalore,157/8,20.0,161,3,157,8
2,Lucknow Super Giants,193/6,20.0,Royal Challengers Bangalore,207/4,20.0,193,6,207,4
3,Gujarat Titans,191/3,19.3,Rajasthan Royals,188/6,20.0,191,3,188,6
4,Sunrisers Hyderabad,157/8,20.0,Punjab Kings,160/5,15.1,157,8,160,5
...,...,...,...,...,...,...,...,...,...,...
69,Sunrisers Hyderabad,149/7,20.0,Rajasthan Royals,210/6,20.0,149,7,210,6
70,Gujarat Titans,161/5,19.4,Lucknow Super Giants,158/6,20.0,161,5,158,6
71,Royal Challengers Bangalore,205/2,20.0,Punjab Kings,208/5,19.0,205,2,208,5
72,Delhi Capitals,179/6,18.2,Mumbai Indians,177/5,20.0,179,6,177,5


### Missing Values

In [100]:
df["HomeWickets"] = df["HomeWickets"].fillna(0)
df["AwayWickets"] = df["AwayWickets"].fillna(0)

In [101]:
df.describe()

Unnamed: 0,HomeOvers,AwayOvers,HomeRuns,HomeWickets,AwayRuns
count,74.0,74.0,74.0,74.0,74.0
mean,19.177027,19.317568,162.743243,5.189189,166.918919
std,1.686899,1.828343,29.650965,2.64148,29.907509
min,10.3,8.0,68.0,0.0,72.0
25%,19.2,20.0,150.25,4.0,151.25
50%,20.0,20.0,162.5,5.5,163.5
75%,20.0,20.0,180.5,7.0,191.5
max,20.0,20.0,217.0,9.0,222.0


### Changing dytpes

In [103]:
df["HomeRuns"] = df["HomeRuns"].astype(int)
df["HomeWickets"] = df["HomeWickets"].astype(int)
df["AwayRuns"] = df["AwayRuns"].astype(int)
df["AwayWickets"] = df["AwayWickets"].astype(int)

In [104]:
df.dtypes

HomeTeam        object
HomeScore       object
HomeOvers      float64
AwayTeam        object
AwayScore       object
AwayOvers      float64
HomeRuns         int32
HomeWickets      int32
AwayRuns         int32
AwayWickets      int32
dtype: object

In [106]:
df.columns.values

array(['HomeTeam', 'HomeScore', 'HomeOvers', 'AwayTeam', 'AwayScore',
       'AwayOvers', 'HomeRuns', 'HomeWickets', 'AwayRuns', 'AwayWickets'],
      dtype=object)

In [108]:
columns_rearranged = ['HomeTeam', 'HomeRuns', 'HomeWickets', 'HomeOvers', 'AwayTeam', 'AwayRuns', 'AwayWickets',
       'AwayOvers']
df = df[columns_rearranged]
df

Unnamed: 0,HomeTeam,HomeRuns,HomeWickets,HomeOvers,AwayTeam,AwayRuns,AwayWickets,AwayOvers
0,Gujarat Titans,133,9,18.1,Rajasthan Royals,130,9,20.0
1,Rajasthan Royals,161,8,18.1,Royal Challengers Bangalore,157,8,20.0
2,Lucknow Super Giants,193,4,20.0,Royal Challengers Bangalore,207,4,20.0
3,Gujarat Titans,191,6,19.3,Rajasthan Royals,188,6,20.0
4,Sunrisers Hyderabad,157,5,20.0,Punjab Kings,160,5,15.1
...,...,...,...,...,...,...,...,...
69,Sunrisers Hyderabad,149,6,20.0,Rajasthan Royals,210,6,20.0
70,Gujarat Titans,161,6,19.4,Lucknow Super Giants,158,6,20.0
71,Royal Challengers Bangalore,205,5,20.0,Punjab Kings,208,5,19.0
72,Delhi Capitals,179,5,18.2,Mumbai Indians,177,5,20.0


### Adding Winner Column

In [109]:
df.loc[df['HomeRuns'] > df['AwayRuns'], 'Winner'] = df['HomeTeam'] 
df.loc[df['HomeRuns'] < df['AwayRuns'], 'Winner'] = df['AwayTeam']
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)


Unnamed: 0,HomeTeam,HomeRuns,HomeWickets,HomeOvers,AwayTeam,AwayRuns,AwayWickets,AwayOvers,Winner
0,Gujarat Titans,133,9,18.1,Rajasthan Royals,130,9,20.0,Gujarat Titans
1,Rajasthan Royals,161,8,18.1,Royal Challengers Bangalore,157,8,20.0,Rajasthan Royals
2,Lucknow Super Giants,193,4,20.0,Royal Challengers Bangalore,207,4,20.0,Royal Challengers Bangalore
3,Gujarat Titans,191,6,19.3,Rajasthan Royals,188,6,20.0,Gujarat Titans
4,Sunrisers Hyderabad,157,5,20.0,Punjab Kings,160,5,15.1,Punjab Kings
...,...,...,...,...,...,...,...,...,...
69,Sunrisers Hyderabad,149,6,20.0,Rajasthan Royals,210,6,20.0,Rajasthan Royals
70,Gujarat Titans,161,6,19.4,Lucknow Super Giants,158,6,20.0,Gujarat Titans
71,Royal Challengers Bangalore,205,5,20.0,Punjab Kings,208,5,19.0,Punjab Kings
72,Delhi Capitals,179,5,18.2,Mumbai Indians,177,5,20.0,Delhi Capitals


In [111]:
df.to_csv('IPL_DATA2020.csv')