<center><h1><b>Pandas Data Processing & Missing Values Handling</h1></center>

## 1) Importing Library

In [45]:
#Importing library:
import pandas as pd

## 2) Creating a DataFrame

In [46]:
#Creating DataFrame
data = {
    'Name':['John',None,'Anu'],
    'Age':[33,44,None]
}
df = pd.DataFrame(data)
print(df)

   Name   Age
0  John  33.0
1  None  44.0
2   Anu   NaN


## 3) Checking for Missing Values

In [47]:
#Checking for Missing Values
print(df.isnull())
print(df.isnull().sum())

    Name    Age
0  False  False
1   True  False
2  False   True
Name    1
Age     1
dtype: int64


## 4) Handling Missing Values

In [48]:
#Filling Missing values
df['Age'] = df['Age'].fillna(df['Age'].mean())
df['Name'] = df['Name'].fillna('Unknown')
print(df)

      Name   Age
0     John  33.0
1  Unknown  44.0
2      Anu  38.5


## 5) Detecting and Filtering Outliers Using the IQR Method

In [49]:
#Detecting and Filtering Outliers
df = pd.DataFrame({'salary':[25000,28000,27000,100000]})
Q1 = df['salary'].quantile(0.25)
Q3 = df['salary'].quantile(0.75)
IQR = Q3 - Q1
ans = df[(df['salary']>= Q1-1.5*IQR) & (df['salary']<= Q3+1.5*IQR)]
print(df)
print(ans)

   salary
0   25000
1   28000
2   27000
3  100000
   salary
0   25000
1   28000
2   27000


## 6) Detecting and Filtering Outliers in a Numeric Column Using the IQR Method

In [50]:
#Numeric Column Using the IQR Method
x = pd.DataFrame({'num':[51,52,53,54,55,56,57,99,100000]})
Q1 = x['num'].quantile(0.25)
Q2 = x['num'].quantile(0.75)
IQR =Q3-Q1
result = x[(x['num']>= Q1-1.5*IQR) & (x['num']<=Q3+1.5*IQR)]
print(x)
print(result)

      num
0      51
1      52
2      53
3      54
4      55
5      56
6      57
7      99
8  100000
      num
0      51
1      52
2      53
3      54
4      55
5      56
6      57
7      99
8  100000


## 7) Calculating BMI Column from Height and Weight

In [57]:
#Calculating BMI Column
x = pd.DataFrame({'height':[1.53,1.62],'weight':[44,55]})
# Standard BMI formula:
# BMI = weight (kg) / (height (m) ** 2)
x['BMI'] = x['weight'] / (x['height']**2)
print(x)

   height  weight        BMI
0    1.53      44  18.796189
1    1.62      55  20.957171


## 8) Dropping an Unnecessary Column

In [52]:
#Dropping an Unnecessary Column
x = pd.DataFrame({'ID':[1,2], 'Name':['A','b'], 'Age':[22,24]})
x.drop('ID',axis=1,inplace=True)
print(x)

  Name  Age
0    A   22
1    b   24


## 9) Calculating Age from Year Column

In [53]:
#Calculating Age
x = pd.DataFrame({'Years':[2004,2008,2016]})
x['Age'] = 2025 - x['Years']
print(x)

   Years  Age
0   2004   21
1   2008   17
2   2016    9


## 10) Converting Column Data Type from String to Integer

In [54]:
#Converting Column Data Type from String to Integer
x = pd.DataFrame({'salary':['1000','2000','3000']})
x['salary'] = x['salary'].astype(int)
print(x.dtypes)

salary    int64
dtype: object


## 11) Converting String Dates to Datetime Objects

In [55]:
#Converting String Dates to Datetime Objects
x = pd.DataFrame({'join_date': ['12-03-2024', '05-04-2024']})
x['join_date'] = pd.to_datetime(x['join_date'])
print(x)

   join_date
0 2024-12-03
1 2024-05-04


## 12) Renaming DataFrame Columns

In [56]:
#Renaming Columns
x = pd.DataFrame({'emp_name':['Anu'],'emp_age':[22]})
x.rename(columns={'emp_name':'Name', 'emp_age':'Age'},inplace=True)
print(x)

  Name  Age
0  Anu   22
