In [1]:
import pandas as pd
import numpy as np


# 1. Load Data: Create a sample DataFrame with missing values

In [2]:
data = {
    'Title': ['Inception', 'Titanic', 'Avatar', 'The Matrix', 'Gladiator'],
    'Rating': [8.8, 7.8, np.nan, 8.7, np.nan],
    'Release_Year': ['2010', '1997', '2009', None, '2000']
}

df = pd.DataFrame(data)

# Print initial info
print("Before Cleaning:")
print(df.info())
print(df, "\n")

Before Cleaning:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Title         5 non-null      object 
 1   Rating        3 non-null      float64
 2   Release_Year  4 non-null      object 
dtypes: float64(1), object(2)
memory usage: 252.0+ bytes
None
        Title  Rating Release_Year
0   Inception     8.8         2010
1     Titanic     7.8         1997
2      Avatar     NaN         2009
3  The Matrix     8.7         None
4   Gladiator     NaN         2000 



# 2. Check for Missing Values

In [3]:
print("Missing Values:\n", df.isnull().sum(), "\n")

Missing Values:
 Title           0
Rating          2
Release_Year    1
dtype: int64 



# 3. Handle Missing Data

In [4]:
# Fill missing ratings with median
df['Rating'].fillna(df['Rating'].median(), inplace=True)

# Drop rows where 'Release_Year' is missing
df.dropna(subset=['Release_Year'], inplace=True)


# 4. Correct Data Type


In [5]:
# Convert 'Release_Year' from string to int
df['Release_Year'] = df['Release_Year'].astype(int)


# 5. Output: Show cleaned DataFrame info

In [6]:
print("After Cleaning:")
print(df.info())
print(df)

After Cleaning:
<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 0 to 4
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Title         4 non-null      object 
 1   Rating        4 non-null      float64
 2   Release_Year  4 non-null      int64  
dtypes: float64(1), int64(1), object(1)
memory usage: 128.0+ bytes
None
       Title  Rating  Release_Year
0  Inception     8.8          2010
1    Titanic     7.8          1997
2     Avatar     8.7          2009
4  Gladiator     8.7          2000
