### Importing Libraries

In [62]:
import sqlite3 as sl
import pandas as pd

### Creating connection to DB

In [63]:
IPL = sl.connect('database.sqlite')

In [64]:
# List of tables in the database

cursorObj = IPL.cursor()

cursorObj.execute('SELECT name from sqlite_master where type= "table"')

print(cursorObj.fetchall())


[('Extra_Runs',), ('Batsman_Scored',), ('Batting_Style',), ('Country',), ('Season',), ('City',), ('Outcome',), ('Win_By',), ('Wicket_Taken',), ('Venue',), ('Extra_Type',), ('Out_Type',), ('Toss_Decision',), ('Umpire',), ('Team',), ('Ball_by_Ball',), ('sysdiagrams',), ('sqlite_sequence',), ('Match',), ('Rolee',), ('Player_Match',), ('Player',), ('Bowling_Style',)]


### Let us have a look at some of the tables in this database

In [65]:
Query = """
        select *
        from Player;
        """
df = pd.read_sql_query(Query,IPL)
df.head(10)

Unnamed: 0,Player_Id,Player_Name,DOB,Batting_Id,Bowling_Id,Country_Id
0,1,SC Ganguly,1972-07-08 00:00:00,1,1.0,1
1,2,BB McCullum,1981-09-27 00:00:00,2,1.0,4
2,3,RT Ponting,1974-12-19 00:00:00,2,1.0,5
3,4,DJ Hussey,1977-07-15 00:00:00,2,2.0,5
4,5,Mohammad Hafeez,1980-10-17 00:00:00,2,2.0,6
5,6,R Dravid,1973-01-11 00:00:00,2,2.0,1
6,7,W Jaffer,1978-02-16 00:00:00,2,2.0,1
7,8,V Kohli,1988-11-05 00:00:00,2,1.0,1
8,9,JH Kallis,1975-10-16 00:00:00,2,3.0,2
9,10,CL White,1983-08-18 00:00:00,2,4.0,5


In [66]:
Query = """
        select *
        from Season;
        """
df = pd.read_sql_query(Query,IPL)
df

Unnamed: 0,Season_Id,Man_of_the_Series,Orange_Cap,Purple_Cap,Season_Year
0,1,32,100,102,2008
1,2,53,18,61,2009
2,3,133,133,131,2010
3,4,162,162,194,2011
4,5,315,162,190,2012
5,6,32,19,71,2013
6,7,305,46,364,2014
7,8,334,187,71,2015
8,9,8,8,299,2016


### Now what if we want to see the names of the players who were man of the series in each season

## JOINS

<b>JOINS are used to combine rows from multiple tables based on the columns which have similar information

### Left Join

In [67]:
Query = """
          select a.Player_Id,a.Player_Name,b.Season_Id,b.Man_of_the_Series
from Player as a
left join
Season as b
on a.Player_Id= b.Man_of_the_Series;
        """
df = pd.read_sql_query(Query,IPL)
df.head(40)

Unnamed: 0,Player_Id,Player_Name,Season_Id,Man_of_the_Series
0,1,SC Ganguly,,
1,2,BB McCullum,,
2,3,RT Ponting,,
3,4,DJ Hussey,,
4,5,Mohammad Hafeez,,
5,6,R Dravid,,
6,7,W Jaffer,,
7,8,V Kohli,9.0,8.0
8,9,JH Kallis,,
9,10,CL White,,


In left join the observations of the left table are atleast shown once independent of their presence in right table

#### Class Task: Perform left join Player and Season tables to find the orange cap winners

### Right Join

<b>"Right Join" is the exact opposite of "Left Join"

In [68]:
Query = """
          select a.Season_Id,a.Orange_Cap,b.PLayer_Id,b.Player_Name
from Season as a
right join
Player as b
on a.Orange_Cap= b.Player_Id;
        """
df = pd.read_sql_query(Query,IPL)
df.head(40)

DatabaseError: Execution failed on sql '
          select a.Season_Id,a.Orange_Cap,b.PLayer_Id,b.Player_Name
from Season as a
right join
Player as b
on a.Orange_Cap= b.Player_Id;
        ': RIGHT and FULL OUTER JOINs are not currently supported

### Inner Join

<b>"Inner Join" is used to output only those rows which have the matching value in both tables

In [69]:
Query = """
          select a.Season_Id,a.Man_of_the_Series,b.Player_Name
from Season as a
inner join
Player as b
on a.Man_of_the_Series = b.Player_Id;
        """
df = pd.read_sql_query(Query,IPL)
df

Unnamed: 0,Season_Id,Man_of_the_Series,Player_Name
0,1,32,SR Watson
1,2,53,AC Gilchrist
2,3,133,SR Tendulkar
3,4,162,CH Gayle
4,5,315,SP Narine
5,6,32,SR Watson
6,7,305,GJ Maxwell
7,8,334,AD Russell
8,9,8,V Kohli


<b>Note: We can always join a table with itself also called as self join

<b> You can join multiple tables at a time

In [70]:
Query = """
          select a.Season_Id,b.Player_Name as Man_of_the_Series,c.Country_Name
from Season as a
inner join
Player as b
on a.Man_of_the_Series = b.Player_Id
inner join
Country as c
on b.Country_Id = c.Country_Id;
        """
df = pd.read_sql_query(Query,IPL)
df

Unnamed: 0,Season_Id,Man_of_the_Series,Country_Name
0,1,SR Watson,Australia
1,2,AC Gilchrist,Australia
2,3,SR Tendulkar,India
3,4,CH Gayle,West Indies
4,5,SP Narine,West Indies
5,6,SR Watson,Australia
6,7,GJ Maxwell,Australia
7,8,AD Russell,West Indies
8,9,V Kohli,India


#### Class Task: Using inner join, multiple join, self join get a dataframe with the following columns<br>

Seaon_Id, Man_of_the_Series,<br> Man_of_the_Series_Country_Name,<br> Orange_Cap_Winner,<br> Orange_Cap_Winner_Country_Name,<br> Purple_Cap_Winner,<br> Purple_Cap_Winner_Country_Name

# Like & Wild Cards

<b> Like and wild cards are used to identify patterns in strings

In [71]:
# Let's consider the table "Bowling Style"
Query = """
          select *
          from Bowling_Style;
        """
df = pd.read_sql_query(Query,IPL)
df


Unnamed: 0,Bowling_Id,Bowling_Style
0,1,Right-arm medium
1,2,Right-arm offbreak
2,3,Right-arm fast-medium
3,4,Legbreak googly
4,5,Right-arm medium-fast
5,6,Left-arm fast-medium
6,7,Slow left-arm orthodox
7,8,Slow left-arm chinaman
8,9,Left-arm medium-fast
9,10,Legbreak


In [72]:
Query = """
          select *
          from Bowling_Style
          where Bowling_Style like 'left%';
        """
df = pd.read_sql_query(Query,IPL)
df

Unnamed: 0,Bowling_Id,Bowling_Style
0,6,Left-arm fast-medium
1,9,Left-arm medium-fast
2,13,Left-arm medium
3,14,Left-arm fast


In [73]:
Query = """
          select *
          from Bowling_Style
          where Bowling_Style like 'legbreak%';
        """
df = pd.read_sql_query(Query,IPL)
df

Unnamed: 0,Bowling_Id,Bowling_Style
0,4,Legbreak googly
1,10,Legbreak


#### Class Task: Output all the left hand bowling styles

# NULL Values

In [74]:
# Consider the below table
Query = """
          select a.Player_Id,a.Player_Name,b.Season_Id
from Player as a
left join
Season as b
on a.Player_Id= b.Man_of_the_Series;
        """
df = pd.read_sql_query(Query,IPL)
df.head(40)

Unnamed: 0,Player_Id,Player_Name,Season_Id
0,1,SC Ganguly,
1,2,BB McCullum,
2,3,RT Ponting,
3,4,DJ Hussey,
4,5,Mohammad Hafeez,
5,6,R Dravid,
6,7,W Jaffer,
7,8,V Kohli,9.0
8,9,JH Kallis,
9,10,CL White,


In [75]:
# We can filter out NULL values from a table using IS NOT NULL in where clause
# Consider the below table
Query = """
          select a.Player_Id,a.Player_Name,b.Season_Id
from Player as a
left join
Season as b
on a.Player_Id= b.Man_of_the_Series
where b.Season_Id is not null;
        """
df = pd.read_sql_query(Query,IPL)
df.head(40)

Unnamed: 0,Player_Id,Player_Name,Season_Id
0,32,SR Watson,1
1,53,AC Gilchrist,2
2,133,SR Tendulkar,3
3,162,CH Gayle,4
4,315,SP Narine,5
5,32,SR Watson,6
6,305,GJ Maxwell,7
7,334,AD Russell,8
8,8,V Kohli,9


#### Class Task: Output all the players who have never won in any of the three categories (man of the series,Orange cap,Purple cap)

In [76]:
# Replace NULL values with 0

Query = """
          select a.Player_Id,a.Player_Name,ifnull(b.Season_Id,0) as Season_Id
from Player as a
left join
Season as b
on a.Player_Id= b.Man_of_the_Series;
        """
df = pd.read_sql_query(Query,IPL)
df.head(40)

Unnamed: 0,Player_Id,Player_Name,Season_Id
0,1,SC Ganguly,0
1,2,BB McCullum,0
2,3,RT Ponting,0
3,4,DJ Hussey,0
4,5,Mohammad Hafeez,0
5,6,R Dravid,0
6,7,W Jaffer,0
7,8,V Kohli,9
8,9,JH Kallis,0
9,10,CL White,0


# Case When

<b>CASE WHEN is used to get a value based on set of conditions

In [82]:
# Consider the Bowling_Style table
Query = """
        select *
        from Bowling_Style;
        """
df = pd.read_sql_query(Query,IPL)
df

Unnamed: 0,Bowling_Id,Bowling_Style
0,1,Right-arm medium
1,2,Right-arm offbreak
2,3,Right-arm fast-medium
3,4,Legbreak googly
4,5,Right-arm medium-fast
5,6,Left-arm fast-medium
6,7,Slow left-arm orthodox
7,8,Slow left-arm chinaman
8,9,Left-arm medium-fast
9,10,Legbreak


In [85]:
# We can create a new column called "Bowling Hand" using case when

# Consider the Bowling_Style table
Query = """
        select *,
        case
            when Bowling_Id in (1,2,3,5,11,12) then "Right Handed"
            when Bowling_Id in (6,7,8,9,13,14) then "Left Handed"
            else "Legbreak"
            end as Bolwing_Hand
        from Bowling_Style;
        """
df = pd.read_sql_query(Query,IPL)
df

Unnamed: 0,Bowling_Id,Bowling_Style,Bolwing_Hand
0,1,Right-arm medium,Right Handed
1,2,Right-arm offbreak,Right Handed
2,3,Right-arm fast-medium,Right Handed
3,4,Legbreak googly,Legbreak
4,5,Right-arm medium-fast,Right Handed
5,6,Left-arm fast-medium,Left Handed
6,7,Slow left-arm orthodox,Left Handed
7,8,Slow left-arm chinaman,Left Handed
8,9,Left-arm medium-fast,Left Handed
9,10,Legbreak,Legbreak


<b>Note: There is no limit to number of "when - then" statements in CASE WHEN

#### Class Task: Find the list of players whose batting hand is different from their bowling hand

# Aggregate functions and Group By

<b> "COUNT" is used to count the number of rows in each group

In [89]:
# Counting the number of players who were part of IPL
Query = """
        select count(*) as Number_of_Players
        from Player;
        """
df = pd.read_sql_query(Query,IPL)
df


Unnamed: 0,Number_of_Players
0,469


In [90]:
# Count the players from each country_id
Query = """
        select Country_Id,count(*) as Number_of_Players
        from Player
        group by Country_Id;
        """
df = pd.read_sql_query(Query,IPL)
df

Unnamed: 0,Country_Id,Number_of_Players
0,1,262
1,2,39
2,4,22
3,5,72
4,6,13
5,7,20
6,8,19
7,9,2
8,10,14
9,11,5


#### Class Task: Count the number of players from each country name

<b>"SUM" adds all the values of a column/group

In [95]:
# let us consider the "Extra Runs" table
Query = """
        select *
        from Extra_Runs;
        """
df = pd.read_sql_query(Query,IPL)
df.head()


Unnamed: 0,Match_Id,Over_Id,Ball_Id,Extra_Type_Id,Extra_Runs,Innings_No
0,335987,1,1,1,1,1
1,335987,1,2,2,1,2
2,335987,1,3,2,1,1
3,335987,1,7,1,1,1
4,335987,2,3,1,4,2


In [96]:
# let us consider the "Extra Runs" table
Query = """
        select Match_Id,sum(Extra_Runs) as Match_Extra_Runs
        from Extra_Runs
        Group By Match_Id;
        """
df = pd.read_sql_query(Query,IPL)
df.head()

Unnamed: 0,Match_Id,Match_Extra_Runs
0,335987,36
1,335988,17
2,335989,17
3,335990,16
4,335991,38


#### Class Task: Find number of runs scored by each player (use Batsman_Scored, Ball_by_Ball and Player tables)

In [97]:
Query = """
        select *
        from Batsman_Scored
        """
df = pd.read_sql_query(Query,IPL)
df.head()

Unnamed: 0,Match_Id,Over_Id,Ball_Id,Runs_Scored,Innings_No
0,335987,1,1,0,1
1,335987,1,1,1,2
2,335987,1,2,0,1
3,335987,1,3,0,2
4,335987,1,4,0,1


In [98]:
Query = """
        select *
        from Ball_by_Ball
        """
df = pd.read_sql_query(Query,IPL)
df.head()

Unnamed: 0,Match_Id,Over_Id,Ball_Id,Innings_No,Team_Batting,Team_Bowling,Striker_Batting_Position,Striker,Non_Striker,Bowler
0,335987,1,1,1,1,2,1,1,2,14
1,335987,1,1,2,2,1,1,6,7,106
2,335987,1,2,1,1,2,2,2,1,14
3,335987,1,2,2,2,1,2,7,6,106
4,335987,1,3,1,1,2,2,2,1,14


In [99]:
Query = """
        select *
        from Player
        """
df = pd.read_sql_query(Query,IPL)
df.head()

Unnamed: 0,Player_Id,Player_Name,DOB,Batting_Id,Bowling_Id,Country_Id
0,1,SC Ganguly,1972-07-08 00:00:00,1,1.0,1
1,2,BB McCullum,1981-09-27 00:00:00,2,1.0,4
2,3,RT Ponting,1974-12-19 00:00:00,2,1.0,5
3,4,DJ Hussey,1977-07-15 00:00:00,2,2.0,5
4,5,Mohammad Hafeez,1980-10-17 00:00:00,2,2.0,6
