In [1]:
import numpy as np
import pandas as pd

Create a Series with custom index labels (e.g., A, B, C, D).

In [2]:
s = pd.Series([11,22,33,44],
              index=['A','B','C','D'])
s

A    11
B    22
C    33
D    44
dtype: int64

Create a DataFrame from a list of dictionaries.


In [4]:
df = pd.DataFrame({"name": ["jsjjs","dkkd","rfdf","jfkfkkf","ffedf",'dffdfd'],
                   "age": [33,44,55,45,22,33]})
df

Unnamed: 0,name,age
0,jsjjs,33
1,dkkd,44
2,rfdf,55
3,jfkfkkf,45
4,ffedf,22
5,dffdfd,33


In [5]:
df.head(3)

Unnamed: 0,name,age
0,jsjjs,33
1,dkkd,44
2,rfdf,55


Check the number of rows and columns using .shape.


In [6]:
df.shape

(6, 2)

Drop a column from a DataFrame.


In [11]:
df.drop("age", axis=1)

Unnamed: 0,name
0,jsjjs
1,dkkd
2,rfdf
3,jfkfkkf
4,ffedf
5,dffdfd


In [12]:
df

Unnamed: 0,name,age
0,jsjjs,33
1,dkkd,44
2,rfdf,55
3,jfkfkkf,45
4,ffedf,22
5,dffdfd,33


Select rows by index labels using loc[].


In [16]:
df.loc[2:4,"age"]

2    55
3    45
4    22
Name: age, dtype: int64

Select alternate rows from a DataFrame.


In [17]:
df[::2]

Unnamed: 0,name,age
0,jsjjs,33
2,rfdf,55
4,ffedf,22


Retrieve the last 2 rows using tail().


In [18]:
df.tail(2)

Unnamed: 0,name,age
4,ffedf,22
5,dffdfd,33


Get rows where Name starts with "d".


In [None]:
df[df["name"].str.startswith("d")]

Unnamed: 0,name,age
1,dkkd,44
5,dffdfd,33


Change the index of a DataFrame to a column (e.g., Name).


In [22]:
df.set_index("name")

Unnamed: 0_level_0,age
name,Unnamed: 1_level_1
jsjjs,33
dkkd,44
rfdf,55
jfkfkkf,45
ffedf,22
dffdfd,33


In [23]:
data = {
    "Name": ["Alice", "Bob", "Charlie", None, "Eve"],
    "Age": [25, np.nan, 30, 35, np.nan],
    "Department": ["HR", "Finance", None, "IT", None],
    "Salary": [50000, 60000, np.nan, 80000, None],
    "AllNaN": [np.nan, np.nan, np.nan, np.nan, np.nan]  # for dropping test
}

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,Department,Salary,AllNaN
0,Alice,25.0,HR,50000.0,
1,Bob,,Finance,60000.0,
2,Charlie,30.0,,,
3,,35.0,IT,80000.0,
4,Eve,,,,


In [24]:
df.fillna("Unknown")

Unnamed: 0,Name,Age,Department,Salary,AllNaN
0,Alice,25.0,HR,50000.0,Unknown
1,Bob,Unknown,Finance,60000.0,Unknown
2,Charlie,30.0,Unknown,Unknown,Unknown
3,Unknown,35.0,IT,80000.0,Unknown
4,Eve,Unknown,Unknown,Unknown,Unknown


In [25]:
df.dropna(thresh=1, axis=1)

Unnamed: 0,Name,Age,Department,Salary
0,Alice,25.0,HR,50000.0
1,Bob,,Finance,60000.0
2,Charlie,30.0,,
3,,35.0,IT,80000.0
4,Eve,,,


Replace missing values in a numeric column with its median.


In [26]:
df["Salary"].fillna(df["Salary"].mean())

0    50000.000000
1    60000.000000
2    63333.333333
3    80000.000000
4    63333.333333
Name: Salary, dtype: float64

Count total missing values in each column.


In [27]:
df.isnull().sum()

Name          1
Age           2
Department    2
Salary        2
AllNaN        5
dtype: int64

Interpolate missing values in a numeric column.


In [28]:
df["Age"].interpolate()

0    25.0
1    27.5
2    30.0
3    35.0
4    35.0
Name: Age, dtype: float64

Sort a DataFrame by Age in descending order.


In [36]:
sort = df.sort_values(by="Age", ascending=False)
sort

Unnamed: 0,Name,Age,Department,Salary,AllNaN
3,,35.0,IT,80000.0,
2,Charlie,30.0,,,
0,Alice,25.0,HR,50000.0,
1,Bob,,Finance,60000.0,
4,Eve,,,,


In [38]:
print(df["Salary"].min())

50000.0


In [54]:
df.loc[df["Name"] == "Eve", "Salary"] = 50000

Calculate mean age grouped by Gender.


In [41]:
df.groupby("Department")["Salary"].mean()

Department
Finance    60000.0
HR         50000.0
IT         80000.0
Name: Salary, dtype: float64

Count unique values in a column.


In [45]:
print(np.unique(df["Age"]))

[25. 30. 35. nan]


Find the employee(s) with the lowest salary.


In [71]:

lowest_salary = df["Salary"].min()
lowest_salary_rows = df[df["Salary"] == lowest_salary]
lowest_salary_rows

Unnamed: 0,Name,Age,Department,Salary,AllNaN
0,Alice,25.0,HR,50000.0,
4,Eve,,,50000.0,


In [65]:
df = df.iloc[:, :-1]
df

Unnamed: 0,Name,Age,Department,Salary,AllNaN
0,Alice,25.0,HR,50000.0,
1,Bob,,Finance,60000.0,
2,Charlie,30.0,,,
3,,35.0,IT,80000.0,
4,Eve,,,50000.0,


Concatenate three DataFrames column-wise.


In [72]:
df1 = pd.DataFrame({"A": [1,2,3]})
df2 = pd.DataFrame({"B": [4,5,6]})
df3 = pd.DataFrame({"C": [7,8,9]})

In [73]:
pd.concat([df1, df2, df3], axis=1)

Unnamed: 0,A,B,C
0,1,4,7
1,2,5,8
2,3,6,9


Perform an inner join on two DataFrames.


In [74]:
df1 = pd.DataFrame({
    "EmpID": [1,2,3],
    "Name": ["Alice","Bob","Charlie"]
})

df2 = pd.DataFrame({
    "EmpID": [2,3,4],
    "Dept": ["HR","IT","Finance"]
})

In [82]:
pd.merge(df1,df2, on="EmpID", how="inner")

Unnamed: 0,EmpID,Name,Dept
0,2,Bob,HR
1,3,Charlie,IT


Merge two DataFrames with different column names .


In [83]:
df1 = pd.DataFrame({
    "EmployeeID": [1,2,3],
    "Name": ["Alice","Bob","Charlie"]
})

df2 = pd.DataFrame({
    "EmpID": [2,3,4],
    "Dept": ["HR","IT","Finance"]
})

In [84]:
pd.merge(df1,df2, left_on="EmployeeID", right_on="EmpID")

Unnamed: 0,EmployeeID,Name,EmpID,Dept
0,2,Bob,2,HR
1,3,Charlie,3,IT


Group by Department and get the maximum salary per group.


In [86]:
df = pd.DataFrame({
    "Dept": ["HR","IT","HR","IT","Finance"],
    "Salary": [50000,60000,55000,70000,65000]
})
df

Unnamed: 0,Dept,Salary
0,HR,50000
1,IT,60000
2,HR,55000
3,IT,70000
4,Finance,65000


In [87]:
df.groupby("Dept")["Salary"].max()

Dept
Finance    65000
HR         55000
IT         70000
Name: Salary, dtype: int64

From a sales DataFrame, find the top-selling product per department.


In [88]:
df_sales = pd.DataFrame({
    "Dept": ["Electronics","Electronics","Clothing","Clothing"],
    "Product": ["TV","Laptop","Shirt","Jeans"],
    "Sales": [100, 150, 200, 180]
})
df_sales

Unnamed: 0,Dept,Product,Sales
0,Electronics,TV,100
1,Electronics,Laptop,150
2,Clothing,Shirt,200
3,Clothing,Jeans,180


In [91]:
top = df_sales.groupby("Dept")["Sales"].idxmax()
top_sell = df_sales.loc[top]
top_sell

Unnamed: 0,Dept,Product,Sales
2,Clothing,Shirt,200
1,Electronics,Laptop,150
