In [1]:
#1. merge() and concat() in Pandas
#What are they?
#merge() = SQL-style joins (combine rows based on keys)

#concat() = Stack DataFrames vertically or horizontally

import pandas as pd

df1 = pd.DataFrame({
    'ID': [1,2,3],
    'Name':['Atharva','Nihal','Reva']
})

df2 = pd.DataFrame({
    'ID':[1,2,4],
    'Score':[88,99,95]
})

merged_df =pd.merge(df1,df2,on='ID',how='inner')

print("Merged DataFrame (inner join):")
print(merged_df)

Merged DataFrame (inner join):
   ID     Name  Score
0   1  Atharva     88
1   2    Nihal     99


In [3]:
#1.2 concat() – Stack DataFrames

df_a=pd.DataFrame ({ 'Name':['Atharva','Nihal'],'Age':[21,22] })

df_b=pd.DataFrame ({ 'Name':['Reva','Omkar'],'Age':[23,24] })

concat_df= pd.concat([df_a,df_b])

print("Concatenated DataFrame (vertical):")
print(concat_df)



Concatenated DataFrame (vertical):
      Name  Age
0  Atharva   21
1    Nihal   22
0     Reva   23
1    Omkar   24


In [6]:
#groupby() – Group and Aggregate

data= pd.DataFrame({'Department':['CS','CS','IT','IT','ENTC'],
                    'Marks':[88,90,98,88,90] })

grouped=data.groupby('Department')['Marks'].mean()
print("Average marks per department:")
print(grouped)

Average marks per department:
Department
CS      89.0
ENTC    90.0
IT      93.0
Name: Marks, dtype: float64


In [8]:
# Pivot & Pivot Table
#Pivot Table (used for aggregating data)

df = pd.DataFrame({
    'Name': ['Atharva', 'Nihal', 'Reva', 'Atharva'],
    'Subject': ['Math', 'Math', 'Math', 'Science'],
    'Score': [90, 85, 95, 88]
})

pivot_table=df.pivot_table(index='Name',columns='Subject',values='Score',aggfunc='mean')
print("Pivot Table:")
print(pivot_table)

Pivot Table:
Subject  Math  Science
Name                  
Atharva  90.0     88.0
Nihal    85.0      NaN
Reva     95.0      NaN


In [9]:
#Handling Missing Data

df = pd.DataFrame({
    'Name': ['Atharva', 'Nihal', 'Reva'],
    'Age': [21, None, 23],
    'City': ['Pune', 'Mumbai', None]
})

print("Original DataFrame with missing values:")
print(df)

# Detect missing
print("\nMissing Values:")
print(df.isnull())

# Fill missing
df_filled=df.fillna("Unknown")
print("\nDataFrame after fillna():")
print(df_filled)

#Drop missing
df_dropped=df.dropna()
print("\nDataFrame after dropna():")
print(df_dropped)


Original DataFrame with missing values:
      Name   Age    City
0  Atharva  21.0    Pune
1    Nihal   NaN  Mumbai
2     Reva  23.0    None

Missing Values:
    Name    Age   City
0  False  False  False
1  False   True  False
2  False  False   True

DataFrame after fillna():
      Name      Age     City
0  Atharva     21.0     Pune
1    Nihal  Unknown   Mumbai
2     Reva     23.0  Unknown

DataFrame after dropna():
      Name   Age  City
0  Atharva  21.0  Pune


In [10]:
#5. Data Type Conversion

df = pd.DataFrame({
    'Age': ['21', '22', '23'],
    'JoinDate': ['2023-01-01', '2023-02-01', '2023-03-01']
})

# Convert Age to integer
df['Age']=df['Age'].astype(int)

# Convert JoinDate to datetime
df['JoinDate'] =pd.to_datetime(df['JoinDate'])

print("DataFrame with converted data types:")
print(df)
print("\nData Types:")
print(df.dtypes)

DataFrame with converted data types:
   Age   JoinDate
0   21 2023-01-01
1   22 2023-02-01
2   23 2023-03-01

Data Types:
Age                  int32
JoinDate    datetime64[ns]
dtype: object
