# Step 1: Create Dummy Data

In [2]:
import pandas as pd
import numpy as np

# Create a dummy DataFrame
data = {
    'ID': range(1, 11),
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Frank', 'Grace', 'Helen', 'Ivy', 'Jack'],
    'Age': np.random.randint(20, 40, size=10),
    'Salary': np.random.randint(30000, 80000, size=10),
    'Department': ['HR', 'IT', 'Finance', 'IT', 'HR', 'Finance', 'Marketing', 'HR', 'IT', 'Finance']
}

df = pd.DataFrame(data)
df

Unnamed: 0,ID,Name,Age,Salary,Department
0,1,Alice,20,47273,HR
1,2,Bob,32,75495,IT
2,3,Charlie,21,62996,Finance
3,4,David,30,37300,IT
4,5,Eva,30,65732,HR
5,6,Frank,39,64495,Finance
6,7,Grace,28,79402,Marketing
7,8,Helen,23,51508,HR
8,9,Ivy,25,36338,IT
9,10,Jack,22,59434,Finance


# Step 2: Basic Pandas Operations

In [7]:
# View top 5 rows
df.head()

Unnamed: 0,ID,Name,Age,Salary,Department
0,1,Alice,20,47273,HR
1,2,Bob,32,75495,IT
2,3,Charlie,21,62996,Finance
3,4,David,30,37300,IT
4,5,Eva,30,65732,HR


In [8]:
# View last 5 rows 
df.tail()

Unnamed: 0,ID,Name,Age,Salary,Department
5,6,Frank,39,64495,Finance
6,7,Grace,28,79402,Marketing
7,8,Helen,23,51508,HR
8,9,Ivy,25,36338,IT
9,10,Jack,22,59434,Finance


In [9]:
# View basic info
print("\nInfo:")
df.info()


Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   ID          10 non-null     int64 
 1   Name        10 non-null     object
 2   Age         10 non-null     int32 
 3   Salary      10 non-null     int32 
 4   Department  10 non-null     object
dtypes: int32(2), int64(1), object(2)
memory usage: 448.0+ bytes


In [10]:
# Summary statistics
print("\nDescribe:\n")
df.describe()


Describe:



Unnamed: 0,ID,Age,Salary
count,10.0,10.0,10.0
mean,5.5,27.0,57997.3
std,3.02765,5.944185,14712.249674
min,1.0,20.0,36338.0
25%,3.25,22.25,48331.75
50%,5.5,26.5,61215.0
75%,7.75,30.0,65422.75
max,10.0,39.0,79402.0


In [11]:

# Column selection
print("\nAges:\n", df['Age'])


Ages:
 0    20
1    32
2    21
3    30
4    30
5    39
6    28
7    23
8    25
9    22
Name: Age, dtype: int32


In [12]:
# Row filtering
print("\nEmployees with Age > 30:\n", df[df['Age'] > 30])


Employees with Age > 30:
    ID   Name  Age  Salary Department
1   2    Bob   32   75495         IT
5   6  Frank   39   64495    Finance


In [13]:

# Sorting
print("\nSorted by Salary (descending):\n", df.sort_values(by='Salary', ascending=False))


Sorted by Salary (descending):
    ID     Name  Age  Salary Department
6   7    Grace   28   79402  Marketing
1   2      Bob   32   75495         IT
4   5      Eva   30   65732         HR
5   6    Frank   39   64495    Finance
2   3  Charlie   21   62996    Finance
9  10     Jack   22   59434    Finance
7   8    Helen   23   51508         HR
0   1    Alice   20   47273         HR
3   4    David   30   37300         IT
8   9      Ivy   25   36338         IT


In [14]:
# Grouping
print("\nAverage Salary by Department:\n", df.groupby('Department')['Salary'].mean())



Average Salary by Department:
 Department
Finance      62308.333333
HR           54837.666667
IT           49711.000000
Marketing    79402.000000
Name: Salary, dtype: float64


In [15]:
# Rename column
df.rename(columns={'Salary': 'Annual_Salary'}, inplace=True)
print("\nRenamed 'Salary' to 'Annual_Salary':\n")
df


Renamed 'Salary' to 'Annual_Salary':



Unnamed: 0,ID,Name,Age,Annual_Salary,Department
0,1,Alice,20,47273,HR
1,2,Bob,32,75495,IT
2,3,Charlie,21,62996,Finance
3,4,David,30,37300,IT
4,5,Eva,30,65732,HR
5,6,Frank,39,64495,Finance
6,7,Grace,28,79402,Marketing
7,8,Helen,23,51508,HR
8,9,Ivy,25,36338,IT
9,10,Jack,22,59434,Finance


In [17]:

# Drop column
df.drop('Department', axis=1, inplace=True)
print("\nAfter Dropping 'Bonus':\n")
df


After Dropping 'Bonus':



Unnamed: 0,ID,Name,Age,Annual_Salary
0,1,Alice,20,47273
1,2,Bob,32,75495
2,3,Charlie,21,62996
3,4,David,30,37300
4,5,Eva,30,65732
5,6,Frank,39,64495
6,7,Grace,28,79402
7,8,Helen,23,51508
8,9,Ivy,25,36338
9,10,Jack,22,59434


In [18]:
# Null check
print("\nAny missing values?\n", df.isnull().sum())


Any missing values?
 ID               0
Name             0
Age              0
Annual_Salary    0
dtype: int64


In [19]:
# Check Duplicated
df.duplicated().sum()

np.int64(0)