In [9]:
import pandas as pd

# Read the CSV (if it's copied as above, save it as 'ipl.csv')
df = pd.read_csv(r"../datasets/iplAnalysis1.csv")

# Rename columns to shorter names (optional, just an example)
df.columns = [
    'Sr no.','Player', 'Team', 'Role', 'Matches', 'Innings', 'NotOuts', 'BallsPlayed', 'RunsScored',
    'BatAvg', 'BatSR', '50+ Scores', '100+ Scores', 'Wickets', 'BowlInnings', 'BallsBowled', 
    'OversBowled', 'RunsConceded', 'BowlEco', 'BowlAvg', 'BowlSR', '4W', '5W'
]

# Show basic info
print("DataFrame Info")
print()
print(df.info())
print()

# Count missing values
print("No of missing values")
print()
print(df.isnull().sum())
print()

# Replace missing values with 0
df.fillna(0, inplace=True)
print()
print("Dataframe after filling missing values")
print()
print(df.head())
print()
print()

# Round off numerical columns to 2 decimal places
columns = ['BatAvg', 'BatSR', 'OversBowled', 'BowlEco', 'BowlAvg', 'BowlSR']
for col in columns:
    df[col] = pd.to_numeric(df[col], errors='coerce').round(2)
print("DataFrame after rounding the columns to 2 places")
print()
print(df.head(5))
print()

# Convert balls bowled to integer
df['BallsBowled'] = pd.to_numeric(df['BallsBowled'], errors='coerce').fillna(0).astype(int)

# Check for impossible values
print("Columns with impossible values")
print()

print("Players with more than 400 sr")
print()
print(df[df['BatSR'] > 400].head())
print()
print()

print("Players with less than 0 matches played")
print()
print(df[df['Matches'] < 0].head())
print()
print()

print("Players with bowling economy less than 0")
print()
print(df[df['BowlEco'] < 0].head())
print()
print()

# Check duplicates
print("Checking for duplicates")
print()
print(df[df.duplicated()].head())
print()

# Save cleaned DataFrame to new CSV
df.to_csv(r"../datasets/iplAnalysisCleaned.csv", index=False)

DataFrame Info

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44 entries, 0 to 43
Data columns (total 23 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Sr no.        44 non-null     int64  
 1   Player        44 non-null     object 
 2   Team          44 non-null     object 
 3   Role          44 non-null     object 
 4   Matches       44 non-null     int64  
 5   Innings       44 non-null     int64  
 6   NotOuts       44 non-null     int64  
 7   BallsPlayed   44 non-null     int64  
 8   RunsScored    44 non-null     int64  
 9   BatAvg        40 non-null     float64
 10  BatSR         39 non-null     float64
 11  50+ Scores    44 non-null     int64  
 12  100+ Scores   44 non-null     int64  
 13  Wickets       44 non-null     int64  
 14  BowlInnings   44 non-null     int64  
 15  BallsBowled   44 non-null     int64  
 16  OversBowled   44 non-null     float64
 17  RunsConceded  44 non-null     int64  
 18  BowlEco       20