In [1]:
import pandas as pd

In [2]:
# Load the 'bowler' sheet from the Excel file
df = pd.read_excel('ODI_cricket.xlsx', sheet_name='bowler')

In [3]:
# Display the first 10 rows
print("First 10 Rows:")
print(df.head(10))

# Display the last 3 rows
print("\nLast 3 Rows:")
print(df.tail(3))

First 10 Rows:
                         Player       Span  Mat  Inns  Balls   Runs  Wkts  \
0  M Muralitharan (Asia/ICC/SL)  1993-2011  350   341  18811  12326   534   
1             Wasim Akram (PAK)  1984-2003  356   351  18186  11812   502   
2            Waqar Younis (PAK)  1989-2003  262   258  12698   9919   416   
3          WPUJC Vaas (Asia/SL)  1994-2008  322   320  15775  11014   400   
4  Shahid Afridi (Asia/ICC/PAK)  1996-2015  398   372  17670  13632   395   
5       SM Pollock (Afr/ICC/SA)  1996-2008  303   297  15712   9631   393   
6          GD McGrath (AUS/ICC)  1993-2007  250   248  12970   8391   381   
7                   B Lee (AUS)  2000-2012  221   217  11185   8877   380   
8               SL Malinga (SL)  2004-2019  226   220  10936   9760   338   
9         A Kumble (Asia/INDIA)  1990-2007  271   265  14496  10412   337   

     Ave  Econ    SR   4   5  
0  23.08  3.93  35.2  15  10  
1  23.52  3.89  36.2  17   6  
2  23.84  4.68  30.5  14  13  
3  27.53  4.1

In [5]:
# Number of rows and columns
num_rows, num_cols = df.shape
print("Number of Rows:", num_rows)
print("Number of Columns:", num_cols)

Number of Rows: 77
Number of Columns: 12


In [6]:
# Data statistics and data types
print("Data Statistics:")
print(df.describe())
print("\nData Types:")
print(df.dtypes)

Data Statistics:
              Mat        Inns         Balls          Runs        Wkts  \
count   77.000000   77.000000     77.000000     77.000000   77.000000   
mean   194.402597  181.194805   8839.402597   6671.714286  233.805195   
std     82.485606   67.958393   3316.055457   2245.839029   84.406603   
min     80.000000   76.000000   4074.000000   2821.000000  151.000000   
25%    136.000000  128.000000   6182.000000   5058.000000  173.000000   
50%    170.000000  164.000000   8054.000000   6192.000000  199.000000   
75%    227.000000  218.000000  10750.000000   8021.000000  272.000000   
max    463.000000  372.000000  18811.000000  13632.000000  534.000000   

             Ave       Econ         SR          4          5  
count  77.000000  77.000000  77.000000  77.000000  77.000000  
mean   28.958052   4.596753  37.909091   6.350649   2.870130  
std     4.826768   0.515814   6.060901   3.556929   2.530606  
min    18.680000   3.300000  26.100000   1.000000   0.000000  
25%    24.

In [7]:
# Check for missing values
missing_values = df.isnull().sum()
print("Missing Values:")
print(missing_values)

Missing Values:
Player    0
Span      0
Mat       0
Inns      0
Balls     0
Runs      0
Wkts      0
Ave       0
Econ      0
SR        0
4         0
5         0
dtype: int64


In [8]:
# Rename columns
df.rename(columns={'Mat': 'Matches', 'Inns': 'Innings', 'Wkts': 'Wickets'}, inplace=True)

In [9]:
# Count players who played for ICC
icc_players = df[df['Player'].str.contains('ICC')]['Player'].count()
print("Number of Players Played for ICC:", icc_players)

Number of Players Played for ICC: 13


In [10]:
# Count different countries
num_countries = df['Player'].apply(lambda x: len(x.split('(')) - 1).max()
print("Number of Different Countries:", num_countries)

Number of Different Countries: 1


In [11]:
# Player(s) who played for the longest period
longest_span = df[df['Span'] == df['Span'].max()]['Player']
print("Player(s) with Longest Playing Period:")
print(longest_span)

Player(s) with Longest Playing Period:
74    Rashid Khan (AFG)
Name: Player, dtype: object


In [12]:
# Player(s) who played for the shortest period
shortest_span = df[df['Span'] == df['Span'].min()]['Player']
print("Player(s) with Shortest Playing Period:")
print(shortest_span)

Player(s) with Shortest Playing Period:
65    RJ Hadlee (NZ)
Name: Player, dtype: object


In [13]:
# Count Australian bowlers
australian_bowlers = df[df['Player'].str.contains('AUS')]['Player'].count()
print("Number of Australian Bowlers:", australian_bowlers)

Number of Australian Bowlers: 10


In [14]:
# Check for Bangladeshi player
bangladeshi_player = any(df['Player'].str.contains('BAN'))
print("Bangladeshi Player Present:", bangladeshi_player)

Bangladeshi Player Present: True


In [15]:
# Player(s) with the lowest economy rate
lowest_econ = df[df['Econ'] == df['Econ'].min()]['Player']
print("Player(s) with Lowest Economy Rate:")
print(lowest_econ)

Player(s) with Lowest Economy Rate:
65    RJ Hadlee (NZ)
Name: Player, dtype: object


In [16]:
# Player(s) with the lowest strike rate
lowest_sr = df[df['SR'] == df['SR'].min()]['Player']
print("Player(s) with Lowest Strike Rate:")
print(lowest_sr)

Player(s) with Lowest Strike Rate:
41    MA Starc (AUS)
Name: Player, dtype: object


In [17]:
# Player(s) with the lowest bowling average
lowest_ave = df[df['Ave'] == df['Ave'].min()]['Player']
print("Player(s) with Lowest Bowling Average:")
print(lowest_ave)

Player(s) with Lowest Bowling Average:
74    Rashid Khan (AFG)
Name: Player, dtype: object


In [27]:
# Remove unnecessary columns
columns_to_remove = [4, 5]
df_cleaned = df.drop(columns=columns_to_remove)