## Handling NAN values

In [43]:
import pandas as pd
import numpy as np

df = pd.DataFrame({
    "EmployeeID": [101, 102, 103, 104, 105],
    "Name": ["Alice", "Bob", "Charlie", "David", "Eve"],
    "Department": ["HR", "IT", "Finance", "IT", np.nan],
    "Salary": [50000, 60000, np.nan, 65000, 55000]
})

pd.isna(df).sum() #findng the number of nan values in every row
df.fillna("0") #replacing the nan values with 0

# filling nan values with different values according to columns
columns = {"EmployeeID":"000", "Name":"none", "Department":"IT", "Salary":000000.0}
df.fillna(columns, inplace=True)
df


Unnamed: 0,EmployeeID,Name,Department,Salary
0,101,Alice,HR,50000.0
1,102,Bob,IT,60000.0
2,103,Charlie,Finance,0.0
3,104,David,IT,65000.0
4,105,Eve,IT,55000.0


In [44]:
import pandas as pd

df1 = pd.DataFrame({
    "EmployeeID": [101, 102, 103, 104, 105],
    "Name": ["Alice", "Bob", "Charlie", "David", "Eve"],
    "Salary": [50000, 60000, None, 65000, 55000]
})

df2 = pd.DataFrame({
    "EmployeeID": [101, 102, 103, 104, 105],
    "Bonus": [5000, 7000, 6000, None, 4000]
})


### Mergin Joining and concatenation

In [45]:
# Merging df1 and df2
merged_df = pd.merge(df1, df2, on="EmployeeID", how="inner")
print(merged_df)

   EmployeeID     Name   Salary   Bonus
0         101    Alice  50000.0  5000.0
1         102      Bob  60000.0  7000.0
2         103  Charlie      NaN  6000.0
3         104    David  65000.0     NaN
4         105      Eve  55000.0  4000.0


In [48]:
# concatinating df1 and df2
pd.concat([df1, df2], axis=1)

df2

Unnamed: 0,EmployeeID,Bonus
0,101,5000.0
1,102,7000.0
2,103,6000.0
3,104,
4,105,4000.0


In [50]:
# joining df1 and df2
df1.join(df2["Bonus"])

newDf1 = df1.set_index("EmployeeID")
newDF2 = df2.set_index("EmployeeID")

newDf1.join(newDF2)

Unnamed: 0_level_0,Name,Salary,Bonus
EmployeeID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
101,Alice,50000.0,5000.0
102,Bob,60000.0,7000.0
103,Charlie,,6000.0
104,David,65000.0,
105,Eve,55000.0,4000.0


### GroupBy and Aggregation

In [51]:
import pandas as pd

df3 = pd.DataFrame({
    "Department": ["HR", "IT", "Finance", "IT", "Finance", "HR"],
    "Projects": [2, 5, 3, 4, 6, 1],
    "Salary": [50000, 60000, 55000, 65000, 70000, 52000]
})
df3

Unnamed: 0,Department,Projects,Salary
0,HR,2,50000
1,IT,5,60000
2,Finance,3,55000
3,IT,4,65000
4,Finance,6,70000
5,HR,1,52000


In [53]:
projects = df3.groupby("Department")["Projects"].sum()
print(projects)

salary = df3.groupby("Department")["Salary"].mean()
print(salary)


Department
Finance    9
HR         3
IT         9
Name: Projects, dtype: int64
Department
Finance    62500.0
HR         51000.0
IT         62500.0
Name: Salary, dtype: float64


In [61]:

df4 = pd.DataFrame({
    "Projects": [2, 5, 3, 4, 6, 1],
    "Salary": [50000, 60000, 55000, 65000, 70000, 52000]
})

df4

Unnamed: 0,Projects,Salary
0,2,50000
1,5,60000
2,3,55000
3,4,65000
4,6,70000
5,1,52000


In [62]:
df4
df4.aggregate([sum, max, min ])

  df4.aggregate([sum, max, min ])
  df4.aggregate([sum, max, min ])
  df4.aggregate([sum, max, min ])


Unnamed: 0,Projects,Salary
sum,21,352000
max,6,70000
min,1,50000
