## 📅 Day 4: Sorting & Operations


Sorting with sort_values(), summary functions like sum(), mean(), describe().


In [2]:
import pandas as pd
import numpy as np

In [3]:
data = {
    "Name": ["Ali", None, "John", None, "Mohan", "Meera"],
    "Age": [25, None, None, 23, None, 30],
    "Salary": [50000, None, 55000, 60000, 58000, None],
    "Dept": ["IT", None, "Finance", None, "IT", "Sales"]
}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,Salary,Dept
0,Ali,25.0,50000.0,IT
1,,,,
2,John,,55000.0,Finance
3,,23.0,60000.0,
4,Mohan,,58000.0,IT
5,Meera,30.0,,Sales


In [4]:
df.sort_values("Salary")

Unnamed: 0,Name,Age,Salary,Dept
0,Ali,25.0,50000.0,IT
2,John,,55000.0,Finance
4,Mohan,,58000.0,IT
3,,23.0,60000.0,
1,,,,
5,Meera,30.0,,Sales


In [5]:
df.sort_values("Salary", ascending=False)

Unnamed: 0,Name,Age,Salary,Dept
3,,23.0,60000.0,
4,Mohan,,58000.0,IT
2,John,,55000.0,Finance
0,Ali,25.0,50000.0,IT
1,,,,
5,Meera,30.0,,Sales


In [6]:
df.sort_values(["Age","Salary"])

Unnamed: 0,Name,Age,Salary,Dept
3,,23.0,60000.0,
0,Ali,25.0,50000.0,IT
5,Meera,30.0,,Sales
2,John,,55000.0,Finance
4,Mohan,,58000.0,IT
1,,,,


In [7]:
df.sort_index()

Unnamed: 0,Name,Age,Salary,Dept
0,Ali,25.0,50000.0,IT
1,,,,
2,John,,55000.0,Finance
3,,23.0,60000.0,
4,Mohan,,58000.0,IT
5,Meera,30.0,,Sales


Basic operations

In [8]:
df["Salary"] + 5000

0    55000.0
1        NaN
2    60000.0
3    65000.0
4    63000.0
5        NaN
Name: Salary, dtype: float64

In [9]:
df["Age"] * 0.5

0    12.5
1     NaN
2     NaN
3    11.5
4     NaN
5    15.0
Name: Age, dtype: float64

In [10]:
df["Salary"].apply(lambda x: x / 1000)

0    50.0
1     NaN
2    55.0
3    60.0
4    58.0
5     NaN
Name: Salary, dtype: float64

Descriptive statistics

In [11]:
df.describe()

Unnamed: 0,Age,Salary
count,3.0,4.0
mean,26.0,55750.0
std,3.605551,4349.32945
min,23.0,50000.0
25%,24.0,53750.0
50%,25.0,56500.0
75%,27.5,58500.0
max,30.0,60000.0


In [13]:
print(df["Salary"].mean())
print(df["Age"].max())
print(df["Age"].min())
print(df["Salary"].median())

55750.0
30.0
23.0
56500.0


In [15]:
df.sum(numeric_only=True)

Age           78.0
Salary    223000.0
dtype: float64

In [16]:
df["Salary"].describe()

count        4.00000
mean     55750.00000
std       4349.32945
min      50000.00000
25%      53750.00000
50%      56500.00000
75%      58500.00000
max      60000.00000
Name: Salary, dtype: float64

In [17]:
df[["Salary", "Age"]].agg(["mean","max"])

Unnamed: 0,Salary,Age
mean,55750.0,26.0
max,60000.0,30.0


Grouped Aggregations 

In [18]:
df.groupby("Dept")["Salary"].mean()

Dept
Finance    55000.0
IT         54000.0
Sales          NaN
Name: Salary, dtype: float64

Sort a DataFrame by Age.


In [19]:
df.sort_values("Age")

Unnamed: 0,Name,Age,Salary,Dept
3,,23.0,60000.0,
0,Ali,25.0,50000.0,IT
5,Meera,30.0,,Sales
1,,,,
2,John,,55000.0,Finance
4,Mohan,,58000.0,IT


Calculate column-wise sum.


In [20]:
df.sum(numeric_only=True)

Age           78.0
Salary    223000.0
dtype: float64

Get descriptive statistics using describe().


In [21]:
df.describe()

Unnamed: 0,Age,Salary
count,3.0,4.0
mean,26.0,55750.0
std,3.605551,4349.32945
min,23.0,50000.0
25%,24.0,53750.0
50%,25.0,56500.0
75%,27.5,58500.0
max,30.0,60000.0


Sort a DataFrame by Salary.


In [22]:
df.sort_values("Salary")

Unnamed: 0,Name,Age,Salary,Dept
0,Ali,25.0,50000.0,IT
2,John,,55000.0,Finance
4,Mohan,,58000.0,IT
3,,23.0,60000.0,
1,,,,
5,Meera,30.0,,Sales


Get the maximum value of the Age column.


In [24]:
print(df["Age"].mean())

26.0


Find the average salary of employees.


In [25]:
df["Salary"].mean()

np.float64(55750.0)

In [26]:
df.sort_values(["Dept","Salary"])

Unnamed: 0,Name,Age,Salary,Dept
2,John,,55000.0,Finance
0,Ali,25.0,50000.0,IT
4,Mohan,,58000.0,IT
5,Meera,30.0,,Sales
3,,23.0,60000.0,
1,,,,


Find the top 3 highest salaries using sorting.


In [39]:
df.sort_values("Salary",ascending=False).head(3)


Unnamed: 0,Name,Age,Salary,Dept
3,,23.0,60000.0,
4,Mohan,,58000.0,IT
2,John,,55000.0,Finance


📌 Way 2: Using nlargest()

In [40]:
df.nlargest(3,"Salary")

Unnamed: 0,Name,Age,Salary,Dept
3,,23.0,60000.0,
4,Mohan,,58000.0,IT
2,John,,55000.0,Finance


In [41]:
df["Salary"].sort_values(ascending=False).head(3)

3    60000.0
4    58000.0
2    55000.0
Name: Salary, dtype: float64