In [1]:
import numpy as np
import pandas as pd

### 🔹 A. Series & Basics

Create a Series of 5 random numbers with index labels X, Y, Z, P, Q.

In [10]:
s1 = pd.Series(np.random.randint(1,6,5), index=["x",'y','z','p','q'])
s1


x    3
y    1
z    3
p    4
q    2
dtype: int32

Convert a dictionary {"Math": 90, "Science": 85, "English": 95} into a Series.

In [4]:
s = pd.Series({'Math':90,'Science':85,'English':95})
s

Math       90
Science    85
English    95
dtype: int64

Display only the index labels of a Series.

In [7]:
s.index

Index(['Math', 'Science', 'English'], dtype='object')

Rename index labels of a Series (X ➝ Sub1, Y ➝ Sub2, etc.).

In [11]:
s1.rename({"x":"s1","y":"s2","z":"s3","p":"s4","q":"s5"})

s1    3
s2    1
s3    3
s4    4
s5    2
dtype: int32

### 🔹 B. DataFrame Creation & Indexing

Create a DataFrame from a dictionary of lists (Students, Marks, Subjects).

In [13]:
data = {
    "Student": ["Aman", "Anita", "Raj", "Sara"],
    "Subject": ["Math", "Science", "English", "History"],
    "Marks": [85, 92, 78, 88]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Student,Subject,Marks
0,Aman,Math,85
1,Anita,Science,92
2,Raj,English,78
3,Sara,History,88


In [14]:
df.set_index("Student")

Unnamed: 0_level_0,Subject,Marks
Student,Unnamed: 1_level_1,Unnamed: 2_level_1
Aman,Math,85
Anita,Science,92
Raj,English,78
Sara,History,88


Unnamed: 0,Student,Subject,Marks
0,Aman,Math,85
1,Anita,Science,92
2,Raj,English,78
3,Sara,History,88


Select the row at index 2 using .iloc[].

In [18]:
df.iloc[2,:]

Student        Raj
Subject    English
Marks           78
Name: 2, dtype: object

Select Name and Salary columns without using column names directly (use .iloc).

In [19]:
df_emp = pd.DataFrame({
    "EmpID": [1, 2, 3, 4, 5],
    "Name": ["Alice", "Bob", "Charlie", "David", "Eva"],
    "Dept": ["HR", "IT", "IT", "Finance", np.nan],
    "Gender": ["F", "M", "M", "M", "F"],
    "Age": [25, 30, 28, np.nan, 35],
    "Salary": [50000, 60000, np.nan, 75000, 65000]
})

df_emp

Unnamed: 0,EmpID,Name,Dept,Gender,Age,Salary
0,1,Alice,HR,F,25.0,50000.0
1,2,Bob,IT,M,30.0,60000.0
2,3,Charlie,IT,M,28.0,
3,4,David,Finance,M,,75000.0
4,5,Eva,,F,35.0,65000.0


In [23]:
df_emp.iloc[:, [1,5]]

Unnamed: 0,Name,Salary
0,Alice,50000.0
1,Bob,60000.0
2,Charlie,
3,David,75000.0
4,Eva,65000.0


### 🔹 C. Missing Values

Replace all NaN values in a DataFrame with 0.

In [24]:
df_emp.fillna(0)

Unnamed: 0,EmpID,Name,Dept,Gender,Age,Salary
0,1,Alice,HR,F,25.0,50000.0
1,2,Bob,IT,M,30.0,60000.0
2,3,Charlie,IT,M,28.0,0.0
3,4,David,Finance,M,0.0,75000.0
4,5,Eva,0,F,35.0,65000.0


Drop all rows that contain at least one NaN.

In [27]:
df_emp.dropna()

Unnamed: 0,EmpID,Name,Dept,Gender,Age,Salary
0,1,Alice,HR,F,25.0,50000.0
1,2,Bob,IT,M,30.0,60000.0


Fill NaN values using forward fill (method="pad").

In [31]:
df_emp["Salary"].ffill()

0    50000.0
1    60000.0
2    60000.0
3    75000.0
4    65000.0
Name: Salary, dtype: float64

Count how many employees belong to each Gender.

In [34]:
df_emp["Gender"].value_counts()

Gender
M    3
F    2
Name: count, dtype: int64

Find the top 2 employees with the highest Salary using nlargest().

In [35]:
df_emp.nlargest(2,"Salary")

Unnamed: 0,EmpID,Name,Dept,Gender,Age,Salary
3,4,David,Finance,M,,75000.0
4,5,Eva,,F,35.0,65000.0


### 🔹 E. Combining Data

Concatenate two DataFrames row-wise.

In [36]:
df1 = pd.DataFrame({
    "EmpID": [101, 102, 103],
    "Name": ["Alice", "Bob", "Charlie"],
    "Dept": ["HR", "IT", "Finance"]
})

df2 = pd.DataFrame({
    "EmpID": [104, 105, 106],
    "Name": ["Diana", "Ethan", "Fiona"],
    "Dept": ["IT", "HR", "Finance"]
})

In [39]:
s = pd.concat((df1,df2))
s.reset_index()

Unnamed: 0,index,EmpID,Name,Dept
0,0,101,Alice,HR
1,1,102,Bob,IT
2,2,103,Charlie,Finance
3,0,104,Diana,IT
4,1,105,Ethan,HR
5,2,106,Fiona,Finance


Merge two DataFrames on a common column but with different column names.

In [40]:
df_emp = pd.DataFrame({
    "EmpID": [101, 102, 103],
    "EmpName": ["Alice", "Bob", "Charlie"],
    "Salary": [50000, 60000, 55000]
})

df_dept = pd.DataFrame({
    "DepartmentID": [101, 102, 103],
    "DeptName": ["HR", "IT", "Finance"]
})

In [42]:
pd.merge(df_emp, df_dept, left_on="EmpID", right_on="DepartmentID")

Unnamed: 0,EmpID,EmpName,Salary,DepartmentID,DeptName
0,101,Alice,50000,101,HR
1,102,Bob,60000,102,IT
2,103,Charlie,55000,103,Finance


Perform a left join between employees and departments.

In [43]:
df_employees = pd.DataFrame({
    "EmpID": [101, 102, 103, 104],
    "Name": ["Alice", "Bob", "Charlie", "Diana"],
    "Dept_ID": [1, 2, 3, 2]
})

df_departments = pd.DataFrame({
    "Dept_ID": [1, 2, 3],
    "DeptName": ["HR", "IT", "Finance"]
})

Perform a left join between employees and departments.

Join two DataFrames with overlapping column names using lsuffix and rsuffix

In [46]:
df_employees.join(df_departments, on="Dept_ID", how="left", lsuffix="_l", rsuffix="_r")

Unnamed: 0,Dept_ID,EmpID,Name,Dept_ID_l,Dept_ID_r,DeptName
0,1,101,Alice,1,2.0,IT
1,2,102,Bob,2,3.0,Finance
2,3,103,Charlie,3,,
3,2,104,Diana,2,3.0,Finance


### 🔹 F. Reshaping

In [49]:
df_sales = pd.DataFrame({
    "Dept": ["HR", "HR", "IT", "IT", "Finance", "Finance"],
    "Month": ["Jan", "Feb", "Jan", "Feb", "Jan", "Feb"],
    "Sales": [20000, 25000, 30000, 32000, 15000, 18000]
})

df_sales

Unnamed: 0,Dept,Month,Sales
0,HR,Jan,20000
1,HR,Feb,25000
2,IT,Jan,30000
3,IT,Feb,32000
4,Finance,Jan,15000
5,Finance,Feb,18000


Pivot a DataFrame to show Departments as rows, Months as columns, and Sales as values.

In [55]:
pd.pivot(df_sales,index="Dept", columns="Month", values="Sales")

Month,Feb,Jan
Dept,Unnamed: 1_level_1,Unnamed: 2_level_1
Finance,18000,15000
HR,25000,20000
IT,32000,30000


Melt a DataFrame from wide format (Math, Science, English) to long format.

In [56]:
df_scores = pd.DataFrame({
    "Student": ["Alice", "Bob", "Charlie"],
    "Math": [85, 90, 78],
    "Science": [92, 88, 95],
    "English": [80, 85, 88]
})

df_scores

Unnamed: 0,Student,Math,Science,English
0,Alice,85,92,80
1,Bob,90,88,85
2,Charlie,78,95,88


In [57]:
pd.melt(df_scores, id_vars=["Student"], value_vars=["Math", "Science", "English"], var_name="Subject", value_name="Scores")

Unnamed: 0,Student,Subject,Scores
0,Alice,Math,85
1,Bob,Math,90
2,Charlie,Math,78
3,Alice,Science,92
4,Bob,Science,88
5,Charlie,Science,95
6,Alice,English,80
7,Bob,English,85
8,Charlie,English,88


Stack a DataFrame and then unstack it back.

In [None]:
df = df_scores.set_index("Student").stack()

Student         
Alice    Math       85
         Science    92
         English    80
Bob      Math       90
         Science    88
         English    85
Charlie  Math       78
         Science    95
         English    88
dtype: int64

In [62]:
df.unstack()

Student  0       Aman
         1      Anita
         2        Raj
         3       Sara
Subject  0       Math
         1    Science
         2    English
         3    History
Marks    0         85
         1         92
         2         78
         3         88
dtype: object

### 🔹 G. Cleaning & Utilities

Replace all values 50000 in Salary column with 55000.

In [75]:
df_emp = pd.DataFrame({
    "EmpID": [1, 2, 3, 4, 2],
    "Name": ["Alice", "Bob", "Charlie", "David", "Bob"],
    "Dept": ["HR", "IT", "IT", "Finance", "IT"],
    "Gender": ["F", "M", "M", "M", "M"],
    "Age": [25, 30, 28, np.nan, 30],
    "Salary": [50000, 60000, np.nan, 75000, 60000]
})

df_emp

Unnamed: 0,EmpID,Name,Dept,Gender,Age,Salary
0,1,Alice,HR,F,25.0,50000.0
1,2,Bob,IT,M,30.0,60000.0
2,3,Charlie,IT,M,28.0,
3,4,David,Finance,M,,75000.0
4,2,Bob,IT,M,30.0,60000.0


In [74]:
df_emp.replace(50000,55000)

Unnamed: 0,EmpID,Name,Dept,Gender,Age,Salary
0,1,Alice,HR,F,25.0,55000.0
1,2,Bob,IT,M,30.0,60000.0
2,3,Charlie,IT,M,28.0,
3,4,David,Finance,M,,75000.0
4,5,Bob,IT,M,30.0,60000.0


In [76]:
df_emp[df_emp.duplicated()]

Unnamed: 0,EmpID,Name,Dept,Gender,Age,Salary
4,2,Bob,IT,M,30.0,60000.0


In [79]:
df_emp.drop_duplicates(keep='first')

Unnamed: 0,EmpID,Name,Dept,Gender,Age,Salary
0,1,Alice,HR,F,25.0,50000.0
1,2,Bob,IT,M,30.0,60000.0
2,3,Charlie,IT,M,28.0,
3,4,David,Finance,M,,75000.0


In [86]:
dropped = df_emp.drop(columns=df_emp.columns[-1])
dropped

Unnamed: 0,EmpID,Name,Dept,Gender,Age
0,1,Alice,HR,F,25.0
1,2,Bob,IT,M,30.0
2,3,Charlie,IT,M,28.0
3,4,David,Finance,M,
4,2,Bob,IT,M,30.0
