# Imports

In [1]:
import pandas as pd

# Topics

## Chaining operations in Pandas

In [2]:
# Sample DataFrame
data = {'Name': ['John', 'Anna', 'Peter', 'Linda'],
        'Age': [28, 34, 45, 32],
        'Income': [50000, 60000, 80000, 75000]}
df = pd.DataFrame(data)

# Method chaining example: Filtering and sorting data
result = (
    df
    .loc[df['Age'] > 30]  # Filter rows where Age > 30
    .sort_values(by='Income', ascending=False)  # Sort by Income in descending order
)



In [3]:
result

Unnamed: 0,Name,Age,Income
2,Peter,45,80000
3,Linda,32,75000
1,Anna,34,60000


## Groupby operations in Pandas

In [4]:
data = {'Category': ['A', 'B', 'A', 'B', 'A'],
        'Value': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)

# Grouping data by 'Category' and calculating the sum of 'Value'
grouped_data = df.groupby('Category').sum()

In [5]:
grouped_data

Unnamed: 0_level_0,Value
Category,Unnamed: 1_level_1
A,90
B,60


## Commanding Time Series Data with Finesse

In [5]:

# Sample time series data
date_range = pd.date_range(start='1/1/2023', end='12/31/2028', freq='D')
traffic_data = pd.Series(range(len(date_range)), index=date_range)
print(traffic_data[:5])


2023-01-01    0
2023-01-02    1
2023-01-03    2
2023-01-04    3
2023-01-05    4
Freq: D, dtype: int64


In [6]:
# Resampling and frequency conversion for monthly analysis
yearly_traffic = traffic_data.resample('YE').sum()
print(yearly_traffic)

2023-12-31     66430
2024-12-31    200385
2025-12-31    333245
2026-12-31    466470
2027-12-31    599695
2028-12-31    735111
Freq: YE-DEC, dtype: int64


*we resampled the data into monthly intervals using the resample function*
*having calculated the sum of traffic for each month*

## Multi-level indexing in Pandas

In [7]:
index = pd.MultiIndex.from_tuples(
    [('A', 1), ('A', 2), ('B', 1), ('B', 2)],
    names=['Category', 'Number']
)

df = pd.DataFrame({
    'Value': [10, 20, 30, 40]
}, index=index)

df

Unnamed: 0_level_0,Unnamed: 1_level_0,Value
Category,Number,Unnamed: 2_level_1
A,1,10
A,2,20
B,1,30
B,2,40


In [9]:
# or

data = {
    'Category': ['A', 'A', 'B', 'B'],
    'Number': [1, 2, 1, 2],
    'Value': [10, 20, 30, 40]
}
df = pd.DataFrame(data)
df.set_index(['Category', 'Number'], inplace=True)

df

Unnamed: 0_level_0,Unnamed: 1_level_0,Value
Category,Number,Unnamed: 2_level_1
A,1,10
A,2,20
B,1,30
B,2,40


In [10]:
print(df.swaplevel())

                 Value
Number Category       
1      A            10
2      A            20
1      B            30
2      B            40


In [11]:
print(df.reset_index())

  Category  Number  Value
0        A       1     10
1        A       2     20
2        B       1     30
3        B       2     40


In [14]:
print(df.loc['A']) 


print(df.loc[('A', 1)])

        Value
Number       
1          10
2          20
Value    10
Name: (A, 1), dtype: int64


In [13]:
print(df.groupby(level=['Category']).sum())

          Value
Category       
A            30
B            70


In [15]:
print(df.groupby(level=['Number']).sum())

        Value
Number       
1          40
2          60
