In [2]:
import pandas as pd
import numpy as np

data = {
    'Name': ['Alice', 'Bob', np.nan, 'David', 'Eva'],
    'Age': [25, np.nan, 35, 40, 45],
    'City': ['New York', 'Los Angeles', 'Chicago', np.nan, 'Phoenix']
}

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City
0,Alice,25.0,New York
1,Bob,,Los Angeles
2,,35.0,Chicago
3,David,40.0,
4,Eva,45.0,Phoenix


### 1. df.dropna(): Removes rows with missing values 

In [11]:
df.isnull().sum()

Name    1
Age     1
City    1
dtype: int64

In [5]:
# Drop rows with missing values
df_cleaned = df.dropna()
df_cleaned

Unnamed: 0,Name,Age,City
0,Alice,25.0,New York
4,Eva,45.0,Phoenix


### 2. df.fillna(value): Fills missing values with a specified value

In [7]:
# Fill missing values with a specified value
df_filled = df.fillna({'Name': 'Unknown', 'Age': 0, 'City': 'Unknown'})

df_filled


Unnamed: 0,Name,Age,City
0,Alice,25.0,New York
1,Bob,0.0,Los Angeles
2,Unknown,35.0,Chicago
3,David,40.0,Unknown
4,Eva,45.0,Phoenix


### 3. df.drop(columns): Removes specified columns 

In [12]:
# Drop the 'City' column
df_dropped = df.drop(columns=['City'])

df_dropped


Unnamed: 0,Name,Age
0,Alice,25.0
1,Bob,
2,,35.0
3,David,40.0
4,Eva,45.0


### 4. df.rename(columns={'old_name': 'new_name'}): Renames columns

In [13]:
# Rename columns
df_renamed = df.rename(columns={'Name': 'Full Name', 'Age': 'Years', 'City': 'Location'})

df_renamed


Unnamed: 0,Full Name,Years,Location
0,Alice,25.0,New York
1,Bob,,Los Angeles
2,,35.0,Chicago
3,David,40.0,
4,Eva,45.0,Phoenix


### 1. Lambda Functions in Pandas

In [15]:
import pandas as pd

data = {'numbers': [1, 2, 3, 4, 5]}
df = pd.DataFrame(data)
df

Unnamed: 0,numbers
0,1
1,2
2,3
3,4
4,5


In [16]:
# Applying lambda function to double each number
df['doubled'] = df['numbers'].apply(lambda x: x * 2)
df

Unnamed: 0,numbers,doubled
0,1,2
1,2,4
2,3,6
3,4,8
4,5,10


### Example 2: Using map() with Lambda:

In [17]:
s = pd.Series(['cat', 'dog', 'bird'])
s

0     cat
1     dog
2    bird
dtype: object

In [19]:
# Mapping lambda to convert to uppercase
s = s.map(lambda x: x.upper())
s


0     CAT
1     DOG
2    BIRD
dtype: object

### 2. Merging DataFrames

In [20]:
df1 = pd.DataFrame({'eno': [1, 2, 3], 'ename': ['James', 'Coles', 'Thomas']})
df2 = pd.DataFrame({'eno': [1, 2, 3], 'salary': [1000, 2000, 2500]})

merged_df = df1.merge(df2, on='eno', how='inner')
merged_df


Unnamed: 0,eno,ename,salary
0,1,James,1000
1,2,Coles,2000
2,3,Thomas,2500


### 3. Concatenating DataFrames

In [23]:
df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df2 = pd.DataFrame({'A': [5, 6], 'B': [7, 8]})

df1


Unnamed: 0,A,B
0,1,3
1,2,4


In [24]:
df2

Unnamed: 0,A,B
0,5,7
1,6,8


In [25]:
result = pd.concat([df1, df2])
result


Unnamed: 0,A,B
0,1,3
1,2,4
0,5,7
1,6,8


### Group by a Single Column (Example 1):

In [26]:
import pandas as pd
# Sample DataFrame
data = {'Category': ['A', 'B', 'A', 'B', 'A'],
        'Values': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)
df


Unnamed: 0,Category,Values
0,A,10
1,B,20
2,A,30
3,B,40
4,A,50


In [27]:
# Group by 'Category' and calculate the sum of 'Values'
grouped = df.groupby('Category')['Values'].sum()
grouped

Category
A    90
B    60
Name: Values, dtype: int64

### Group by Multiple Columns (Example 2): 

In [28]:
import pandas as pd
# Sample DataFrame
data = {'Category': ['A', 'B', 'A', 'B', 'A'],
        'Type': ['X', 'X', 'Y', 'Y', 'Y'],
        'Values': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)
df


Unnamed: 0,Category,Type,Values
0,A,X,10
1,B,X,20
2,A,Y,30
3,B,Y,40
4,A,Y,50


In [29]:
# Group by 'Category' and 'Type' and calculate the sum of 'Values'
grouped = df.groupby(['Category', 'Type'])['Values'].sum()
grouped

Category  Type
A         X       10
          Y       80
B         X       20
          Y       40
Name: Values, dtype: int64

### Applying Multiple Aggregations (Example 3):  

In [30]:
import pandas as pd
# Sample DataFrame
data = {'Category': ['A', 'B', 'A', 'B', 'A'],
        'Values': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)
df


Unnamed: 0,Category,Values
0,A,10
1,B,20
2,A,30
3,B,40
4,A,50


In [31]:
# Group by 'Category' and calculate the sum and mean of 'Values'
grouped = df.groupby('Category')['Values'].agg(['sum', 'mean'])
grouped

Unnamed: 0_level_0,sum,mean
Category,Unnamed: 1_level_1,Unnamed: 2_level_1
A,90,30.0
B,60,30.0


### Filtering Groups (Example 4):

In [32]:
import pandas as pd
# Sample DataFrame
data = {'Category': ['A', 'B', 'A', 'B', 'A'],
        'Values': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)
df


Unnamed: 0,Category,Values
0,A,10
1,B,20
2,A,30
3,B,40
4,A,50


In [33]:
# Filter groups where the sum of 'Values' is greater than 50
filtered = df.groupby('Category').filter(lambda x: x['Values'].sum() > 50)
filtered


Unnamed: 0,Category,Values
0,A,10
1,B,20
2,A,30
3,B,40
4,A,50


### Example with HAVING Equivalent (More than 4 employees in each dept):

In [35]:
import pandas as pd
# Sample DataFrame with correct column names
data = {'deptno': [101, 102, 101, 103, 102, 101, 101, 103],
        'eno': [1, 2, 3, 4, 5, 6, 7, 8]}
df = pd.DataFrame(data)
df


Unnamed: 0,deptno,eno
0,101,1
1,102,2
2,101,3
3,103,4
4,102,5
5,101,6
6,101,7
7,103,8


In [36]:
# Group by 'deptno' and count the number of employees ('eno')
grouped = df.groupby('deptno')['eno'].count()

grouped

deptno
101    4
102    2
103    2
Name: eno, dtype: int64

In [40]:
# Filter to get departments with more than 4 employees
filtered = grouped[grouped > 2]
filtered

deptno
101    4
Name: eno, dtype: int64