# Importing data

In [1]:
import pandas as pd
from sqlalchemy import create_engine, text, inspect

In [2]:
engine = create_engine('sqlite:///database.sqlite')

In [3]:
inspector = inspect(engine)
tables = inspector.get_table_names()
tables

['Ball_by_Ball',
 'Batsman_Scored',
 'Batting_Style',
 'Bowling_Style',
 'City',
 'Country',
 'Extra_Runs',
 'Extra_Type',
 'Match',
 'Out_Type',
 'Outcome',
 'Player',
 'Player_Match',
 'Rolee',
 'Season',
 'Team',
 'Toss_Decision',
 'Umpire',
 'Venue',
 'Wicket_Taken',
 'Win_By',
 'sysdiagrams']

# Tasks

## Query 1: Select All Columns from Player’s Table

In [4]:
#  Select All Columns from Player’s Table
df = pd.read_sql_query('SELECT * FROM Player', engine)

df

Unnamed: 0,Player_Id,Player_Name,DOB,Batting_hand,Bowling_skill,Country_Name
0,1,SC Ganguly,1972-07-08 00:00:00,1,1.0,1
1,2,BB McCullum,1981-09-27 00:00:00,2,1.0,4
2,3,RT Ponting,1974-12-19 00:00:00,2,1.0,5
3,4,DJ Hussey,1977-07-15 00:00:00,2,2.0,5
4,5,Mohammad Hafeez,1980-10-17 00:00:00,2,2.0,6
...,...,...,...,...,...,...
464,465,DL Chahar,1992-08-07 00:00:00,2,1.0,1
465,466,P Dharmani,1974-09-27 00:00:00,2,,1
466,467,RV Pawar,1979-09-06 00:00:00,1,7.0,1
467,468,KH Devdhar,1989-12-14 00:00:00,2,,1


## Query 2: Batsman vs Runs

In [8]:
# a SQL query to calculate the total runs scored by each batsman

df2 = pd.read_sql(
    """
    SELECT Player.Player_Name, SUM(Batsman_Scored.Runs_Scored) AS Total_Runs
    FROM Batsman_Scored
    INNER JOIN Ball_by_Ball ON 
        Batsman_Scored.Match_Id = Ball_by_Ball.Match_Id 
        AND Batsman_Scored.Over_Id = Ball_by_Ball.Over_Id 
        AND Batsman_Scored.Ball_Id = Ball_by_Ball.Ball_Id
    INNER JOIN Player ON Ball_by_Ball.Striker = Player.Player_Id
    GROUP BY Player.Player_Name
    ORDER BY Total_Runs DESC;
""",
    engine
)

df2

Unnamed: 0,Player_Name,Total_Runs
0,V Kohli,8158
1,SK Raina,7588
2,RG Sharma,7263
3,G Gambhir,6793
4,RV Uthappa,6303
...,...,...
429,V Pratap Singh,0
430,Sunny Gupta,0
431,ND Doshi,0
432,M Ashwin,0


## Query 3: Fifties and Hundreds

## Query 4: Best Bowling Figures

In [9]:
# Write and execute a SQL query to find the best bowling figures for each bowler.

df4 = pd.read_sql(
    """
    WITH Bowler_Wickets AS (
        SELECT Ball_by_Ball.Bowler AS Player_Id, 
               Ball_by_Ball.Match_Id, 
               Ball_by_Ball.Innings_No, 
               COUNT(*) AS Wickets
        FROM Wicket_Taken
        INNER JOIN Ball_by_Ball ON 
            Wicket_Taken.Match_Id = Ball_by_Ball.Match_Id 
            AND Wicket_Taken.Over_Id = Ball_by_Ball.Over_Id 
            AND Wicket_Taken.Ball_Id = Ball_by_Ball.Ball_Id
        GROUP BY Ball_by_Ball.Bowler, Ball_by_Ball.Match_Id, Ball_by_Ball.Innings_No
    )
    SELECT Player.Player_Name, 
           MAX(Wickets) AS Best_Figures
    FROM Bowler_Wickets
    INNER JOIN Player ON Bowler_Wickets.Player_Id = Player.Player_Id
    GROUP BY Player.Player_Name
    ORDER BY Best_Figures DESC;
""",
    engine
)

df4

Unnamed: 0,Player_Name,Best_Figures
0,A Zampa,9
1,A Nehra,9
2,Z Khan,8
3,MA Starc,8
4,DJG Sammy,8
...,...,...
308,AM Rahane,1
309,AC Voges,1
310,AA Kazi,1
311,AA Jhunjhunwala,1


# Query 5: Comprehensive Career Metrics

In [None]:
# Combine all the previous chunks into a single comprehensive query to get detailed career metrics for players

df_combined = pd.merge(df2, df4, on="Player_Name", how="outer")
df_combined

Unnamed: 0,Player_Name,Total_Runs,Best_Figures
0,A Ashish Reddy,524.0,5.0
1,A Chandila,9.0,5.0
2,A Chopra,123.0,
3,A Flintoff,114.0,5.0
4,A Kumble,101.0,6.0
...,...,...,...
455,YV Takawale,431.0,
456,Yashpal Singh,105.0,
457,Younis Khan,12.0,
458,Yuvraj Singh,4510.0,5.0
