### Python libraries we'll be using in this course:
- Pandas
- Numpy
- Math
- Matplotlib

In [1]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt

# to ignore warnings 
import warnings
warnings.filterwarnings('ignore')

# changing default settings to display max rows & columns
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)

### Datasets used in this course:
- Open source Kaggle datasets of IPL 2008 - 2017
- deliveries.csv : Contains ball by ball information
- matches.csv : Contains match level information like teams, toss, venue, result & so on
- source: Kaggle & Cricsheet

### Sneakpeak into the data:

In [2]:
deliveries = pd.read_csv('deliveries.csv')

In [3]:
matches = pd.read_csv('matches.csv')

In [4]:
deliveries.head(3)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,wide_runs,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
0,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,1,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,0,0,0,,,
1,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,2,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,0,0,0,,,
2,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,3,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,4,0,4,,,


### Columns in deliveries data:

In [5]:
deliveries.columns

Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
       'batsman', 'non_striker', 'bowler', 'is_super_over', 'wide_runs',
       'bye_runs', 'legbye_runs', 'noball_runs', 'penalty_runs',
       'batsman_runs', 'extra_runs', 'total_runs', 'player_dismissed',
       'dismissal_kind', 'fielder'],
      dtype='object')

### Columns in matches data:

In [6]:
matches.columns

Index(['id', 'season', 'city', 'date', 'team1', 'team2', 'toss_winner',
       'toss_decision', 'result', 'dl_applied', 'winner', 'win_by_runs',
       'win_by_wickets', 'player_of_match', 'venue', 'umpire1', 'umpire2',
       'umpire3'],
      dtype='object')

### Python concepts we'll be using in this course:
- Functions
- For loops
- If else, elif
- Lambda functions
- List comprehensions
- Aggregate functions like: sum, max, min, count, unique

### Few examples of the above mentioned concepts:

In [7]:
# functions
def strike_rate(runs, balls):
    return (runs/balls)*100

In [8]:
strike_rate(100, 50)

200.0

In [9]:
# for loops 
for i in range(3):
    print(i)

0
1
2


### Pandas concepts we'll be using in this course:
- Reading/Writing csv
- DataFrame wrangling
- Group by
- Filtering
- Sorting
- Renaming, Reindexing
- Merge functions (joins)

### Few examples of the above mentioned concepts:

In [10]:
df = pd.read_csv('deliveries.csv')

In [11]:
df.head(2)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,wide_runs,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
0,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,1,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,0,0,0,,,
1,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,2,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,0,0,0,,,


In [12]:
df.sort_values(by = ['total_runs'], ascending = False).head(2)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,wide_runs,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
130445,551,1,Sunrisers Hyderabad,Chennai Super Kings,3,3,DA Warner,S Dhawan,MM Sharma,0,0,0,0,1,0,6,1,7,,,
82128,348,1,Deccan Chargers,Pune Warriors,19,6,KC Sangakkara,CL White,A Nehra,0,0,0,0,1,0,6,1,7,,,
