**DATA PROCESSING**

Data processing is the conversion of data into usable or desirable form. It is carried out using a predefined sequence either manually or automatically

In [2]:
import pandas as pd
import requests

# Step 1: URL
url = "https://en.wikipedia.org/wiki/Indian_Premier_League"

# Step 2: Custom headers to mimic a real browser
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
                  'AppleWebKit/537.36 (KHTML, like Gecko) '
                  'Chrome/91.0.4472.124 Safari/537.36'
}

# Step 3: Fetch the page
response = requests.get(url, headers=headers)

# Check if successful
if response.status_code == 200:
    # Step 4: Parse HTML content into DataFrames
    tables = pd.read_html(response.text)
    
    print(f"Number of tables found: {len(tables)}")
    print(tables[0].head())  # Show first table
else:
    print(f"Failed to fetch page: {response.status_code}")

  tables = pd.read_html(response.text)


Number of tables found: 41
               0                                      1
0            NaN                                    NaN
1      Countries                                  India
2  Administrator  Board of Control for Cricket in India
3   Headquarters                          Mumbai, India
4         Format                               Twenty20


In [3]:
df = tables

In [4]:
df1 = df[5]
df1

Unnamed: 0,Team,City,Home ground,Debut,Captain,Head coach,Owner(s),Ref
0,Chennai Super Kings,"Chennai, Tamil Nadu",M. A. Chidambaram Stadium,2008,Ruturaj Gaikwad,Stephen Fleming,Chennai Super Kings Cricket Limited,
1,Delhi Capitals,"New Delhi, Delhi",Arun Jaitley Stadium,2008,Axar Patel,Hemang Badani,GMR Group (50%) JSW Group (50%),[70]
2,Gujarat Titans,"Ahmedabad, Gujarat",Narendra Modi Stadium,2022,Shubman Gill,Ashish Nehra,Torrent Group (67%) CVC Capital (33%),[71]
3,Kolkata Knight Riders,"Kolkata, West Bengal",Eden Gardens,2008,Ajinkya Rahane,Chandrakant Pandit,Shah Rukh Khan (55%) Mehta Group (45%),[71]
4,Lucknow Super Giants,"Lucknow, Uttar Pradesh",Ekana Stadium,2022,Rishabh Pant,Justin Langer,RP-Sanjiv Goenka Group,[72]
5,Mumbai Indians,"Mumbai, Maharashtra",Wankhede Stadium,2008,Hardik Pandya,Mahela Jayawardene,Reliance Industries,[73]
6,Punjab Kings,"New Chandigarh, Punjab",Maharaja Yadavindra Singh Stadium,2008,Shreyas Iyer,Ricky Ponting,Mohit Burman (48%) Ness Wadia (23%) Preity Zin...,[74]
7,Rajasthan Royals,"Jaipur, Rajasthan",Sawai Mansingh Stadium,2008,Sanju Samson,Rahul Dravid,Manoj Badale (65%) RedBird (15%) Lachlan Murdo...,[75]
8,Royal Challengers Bengaluru,"Bengaluru, Karnataka",M. Chinnaswamy Stadium,2008,Rajat Patidar,Andy Flower,United Spirits,[76]
9,Sunrisers Hyderabad,"Hyderabad, Telangana",Rajiv Gandhi Stadium,2013,Pat Cummins,Daniel Vettori,Sun TV Network,[77]


**1. Reading first and last n rows**

In [5]:
df1.head()  # reads first n rows from the dataframe

Unnamed: 0,Team,City,Home ground,Debut,Captain,Head coach,Owner(s),Ref
0,Chennai Super Kings,"Chennai, Tamil Nadu",M. A. Chidambaram Stadium,2008,Ruturaj Gaikwad,Stephen Fleming,Chennai Super Kings Cricket Limited,
1,Delhi Capitals,"New Delhi, Delhi",Arun Jaitley Stadium,2008,Axar Patel,Hemang Badani,GMR Group (50%) JSW Group (50%),[70]
2,Gujarat Titans,"Ahmedabad, Gujarat",Narendra Modi Stadium,2022,Shubman Gill,Ashish Nehra,Torrent Group (67%) CVC Capital (33%),[71]
3,Kolkata Knight Riders,"Kolkata, West Bengal",Eden Gardens,2008,Ajinkya Rahane,Chandrakant Pandit,Shah Rukh Khan (55%) Mehta Group (45%),[71]
4,Lucknow Super Giants,"Lucknow, Uttar Pradesh",Ekana Stadium,2022,Rishabh Pant,Justin Langer,RP-Sanjiv Goenka Group,[72]


In [8]:
df1.tail()  # reads the last n rows from the dataframe

Unnamed: 0,Team,City,Home ground,Debut,Captain,Head coach,Owner(s),Ref
5,Mumbai Indians,"Mumbai, Maharashtra",Wankhede Stadium,2008,Hardik Pandya,Mahela Jayawardene,Reliance Industries,[73]
6,Punjab Kings,"New Chandigarh, Punjab",Maharaja Yadavindra Singh Stadium,2008,Shreyas Iyer,Ricky Ponting,Mohit Burman (48%) Ness Wadia (23%) Preity Zin...,[74]
7,Rajasthan Royals,"Jaipur, Rajasthan",Sawai Mansingh Stadium,2008,Sanju Samson,Rahul Dravid,Manoj Badale (65%) RedBird (15%) Lachlan Murdo...,[75]
8,Royal Challengers Bengaluru,"Bengaluru, Karnataka",M. Chinnaswamy Stadium,2008,Rajat Patidar,Andy Flower,United Spirits,[76]
9,Sunrisers Hyderabad,"Hyderabad, Telangana",Rajiv Gandhi Stadium,2013,Pat Cummins,Daniel Vettori,Sun TV Network,[77]


**2. Renaming column names in a DataFrame using .rename()**

In [11]:
df1.rename(columns = {'City': 'Location', 'Head coach': 'Manager', 'Owner(s)': 'Owner'}, inplace = True)

In [12]:
df1

Unnamed: 0,Team,Location,Home ground,Debut,Captain,Manager,Owner,Ref
0,Chennai Super Kings,"Chennai, Tamil Nadu",M. A. Chidambaram Stadium,2008,Ruturaj Gaikwad,Stephen Fleming,Chennai Super Kings Cricket Limited,
1,Delhi Capitals,"New Delhi, Delhi",Arun Jaitley Stadium,2008,Axar Patel,Hemang Badani,GMR Group (50%) JSW Group (50%),[70]
2,Gujarat Titans,"Ahmedabad, Gujarat",Narendra Modi Stadium,2022,Shubman Gill,Ashish Nehra,Torrent Group (67%) CVC Capital (33%),[71]
3,Kolkata Knight Riders,"Kolkata, West Bengal",Eden Gardens,2008,Ajinkya Rahane,Chandrakant Pandit,Shah Rukh Khan (55%) Mehta Group (45%),[71]
4,Lucknow Super Giants,"Lucknow, Uttar Pradesh",Ekana Stadium,2022,Rishabh Pant,Justin Langer,RP-Sanjiv Goenka Group,[72]
5,Mumbai Indians,"Mumbai, Maharashtra",Wankhede Stadium,2008,Hardik Pandya,Mahela Jayawardene,Reliance Industries,[73]
6,Punjab Kings,"New Chandigarh, Punjab",Maharaja Yadavindra Singh Stadium,2008,Shreyas Iyer,Ricky Ponting,Mohit Burman (48%) Ness Wadia (23%) Preity Zin...,[74]
7,Rajasthan Royals,"Jaipur, Rajasthan",Sawai Mansingh Stadium,2008,Sanju Samson,Rahul Dravid,Manoj Badale (65%) RedBird (15%) Lachlan Murdo...,[75]
8,Royal Challengers Bengaluru,"Bengaluru, Karnataka",M. Chinnaswamy Stadium,2008,Rajat Patidar,Andy Flower,United Spirits,[76]
9,Sunrisers Hyderabad,"Hyderabad, Telangana",Rajiv Gandhi Stadium,2013,Pat Cummins,Daniel Vettori,Sun TV Network,[77]


**3. Deleting a column usinf del keyword**

In [13]:
del df1['Ref']

In [14]:
df1

Unnamed: 0,Team,Location,Home ground,Debut,Captain,Manager,Owner
0,Chennai Super Kings,"Chennai, Tamil Nadu",M. A. Chidambaram Stadium,2008,Ruturaj Gaikwad,Stephen Fleming,Chennai Super Kings Cricket Limited
1,Delhi Capitals,"New Delhi, Delhi",Arun Jaitley Stadium,2008,Axar Patel,Hemang Badani,GMR Group (50%) JSW Group (50%)
2,Gujarat Titans,"Ahmedabad, Gujarat",Narendra Modi Stadium,2022,Shubman Gill,Ashish Nehra,Torrent Group (67%) CVC Capital (33%)
3,Kolkata Knight Riders,"Kolkata, West Bengal",Eden Gardens,2008,Ajinkya Rahane,Chandrakant Pandit,Shah Rukh Khan (55%) Mehta Group (45%)
4,Lucknow Super Giants,"Lucknow, Uttar Pradesh",Ekana Stadium,2022,Rishabh Pant,Justin Langer,RP-Sanjiv Goenka Group
5,Mumbai Indians,"Mumbai, Maharashtra",Wankhede Stadium,2008,Hardik Pandya,Mahela Jayawardene,Reliance Industries
6,Punjab Kings,"New Chandigarh, Punjab",Maharaja Yadavindra Singh Stadium,2008,Shreyas Iyer,Ricky Ponting,Mohit Burman (48%) Ness Wadia (23%) Preity Zin...
7,Rajasthan Royals,"Jaipur, Rajasthan",Sawai Mansingh Stadium,2008,Sanju Samson,Rahul Dravid,Manoj Badale (65%) RedBird (15%) Lachlan Murdo...
8,Royal Challengers Bengaluru,"Bengaluru, Karnataka",M. Chinnaswamy Stadium,2008,Rajat Patidar,Andy Flower,United Spirits
9,Sunrisers Hyderabad,"Hyderabad, Telangana",Rajiv Gandhi Stadium,2013,Pat Cummins,Daniel Vettori,Sun TV Network


**4. Dropping rows and columns from a DataFrame**

In [15]:
# Dropping rows and columns silmutaneously from a dataframe

df1.drop(index = [4,5,6,7], columns = ['Debut','Home ground'])

# Drops rows from the 4th index onwards
# Drops columns 'Debut' and 'Home ground'

Unnamed: 0,Team,Location,Captain,Manager,Owner
0,Chennai Super Kings,"Chennai, Tamil Nadu",Ruturaj Gaikwad,Stephen Fleming,Chennai Super Kings Cricket Limited
1,Delhi Capitals,"New Delhi, Delhi",Axar Patel,Hemang Badani,GMR Group (50%) JSW Group (50%)
2,Gujarat Titans,"Ahmedabad, Gujarat",Shubman Gill,Ashish Nehra,Torrent Group (67%) CVC Capital (33%)
3,Kolkata Knight Riders,"Kolkata, West Bengal",Ajinkya Rahane,Chandrakant Pandit,Shah Rukh Khan (55%) Mehta Group (45%)
8,Royal Challengers Bengaluru,"Bengaluru, Karnataka",Rajat Patidar,Andy Flower,United Spirits
9,Sunrisers Hyderabad,"Hyderabad, Telangana",Pat Cummins,Daniel Vettori,Sun TV Network
