[Reference](https://pawankg.medium.com/exploring-pandas-in-python-filter-and-pivot-operations-with-sample-data-84e7fc4a5565)

In [1]:
import pandas as pd

# Sample Data
data = {
    'Date': ['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-02'],
    'Category': ['A', 'B', 'A', 'B'],
    'Value': [10, 20, 30, 40]
}

df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

Original DataFrame:
         Date Category  Value
0  2023-01-01        A     10
1  2023-01-01        B     20
2  2023-01-02        A     30
3  2023-01-02        B     40


# Filter

In [2]:
# Filtering columns
filtered_df = df.filter(items=['Date', 'Value'])
print("\nFiltered DataFrame:")
print(filtered_df)


Filtered DataFrame:
         Date  Value
0  2023-01-01     10
1  2023-01-01     20
2  2023-01-02     30
3  2023-01-02     40


In [3]:
# Filtering rows based on a condition
filtered_rows = df[df['Value'] > 20]
print("\nFiltered Rows:")
print(filtered_rows)


Filtered Rows:
         Date Category  Value
2  2023-01-02        A     30
3  2023-01-02        B     40


In [4]:
# Using the like parameter
selected_columns = df.filter(like='Dat')
print("\nSelected Columns:")
print(selected_columns)


Selected Columns:
         Date
0  2023-01-01
1  2023-01-01
2  2023-01-02
3  2023-01-02


In [5]:
# Using the regex parameter
selected_columns_regex = df.filter(regex='^C|^V')
print("\nSelected Columns with Regex:")
print(selected_columns_regex)


Selected Columns with Regex:
  Category  Value
0        A     10
1        B     20
2        A     30
3        B     40


In [6]:
# Filtering columns based on a list
selected_columns_list = df.filter(items=['Date', 'Value'])
print("\nSelected Columns with List:")
print(selected_columns_list)


Selected Columns with List:
         Date  Value
0  2023-01-01     10
1  2023-01-01     20
2  2023-01-02     30
3  2023-01-02     40


In [7]:
# Using the items parameter
selected_columns_items = df.filter(items=['Date', 'Val'])
print("\nSelected Columns with Items:")
print(selected_columns_items)


Selected Columns with Items:
         Date
0  2023-01-01
1  2023-01-01
2  2023-01-02
3  2023-01-02


# Pivot

In [8]:
# Pivoting the DataFrame
pivot_df = df.pivot(index='Date', columns='Category', values='Value')
print("\nPivoted DataFrame:")
print(pivot_df)


Pivoted DataFrame:
Category     A   B
Date              
2023-01-01  10  20
2023-01-02  30  40


In [9]:
# Adding a duplicate entry
df = df.append({'Date': '2023-01-01', 'Category': 'A', 'Value': 15}, ignore_index=True)

# Using pivot_table to handle duplicates
pivot_table_df = df.pivot_table(index='Date', columns='Category', values='Value', aggfunc='sum')
print("\nPivot Table DataFrame:")
print(pivot_table_df)


Pivot Table DataFrame:
Category     A   B
Date              
2023-01-01  25  20
2023-01-02  30  40


  df = df.append({'Date': '2023-01-01', 'Category': 'A', 'Value': 15}, ignore_index=True)


In [10]:
# Using pivot with multi-level indexing
multi_level_pivot = df.pivot(index=['Date', 'Category'], columns='Value')
print("\nMulti-level Index Pivot:")
print(multi_level_pivot)


Multi-level Index Pivot:
Empty DataFrame
Columns: []
Index: [(2023-01-01, A), (2023-01-01, B), (2023-01-02, A), (2023-01-02, B)]


In [11]:
# Adding a duplicate entry
df = df.append({'Date': '2023-01-01', 'Category': 'A', 'Value': 15}, ignore_index=True)

# Using groupby and pivot_table to handle duplicates
pivot_fillna = df.groupby(['Date', 'Category'])['Value'].sum().unstack(fill_value=0)
print("\nPivot with Missing Values Filled:")
print(pivot_fillna)


Pivot with Missing Values Filled:
Category     A   B
Date              
2023-01-01  40  20
2023-01-02  30  40


  df = df.append({'Date': '2023-01-01', 'Category': 'A', 'Value': 15}, ignore_index=True)


In [12]:
# Using pivot_table to calculate the average for duplicate entries
pivot_avg = df.pivot_table(index='Date', columns='Category', values='Value', aggfunc='mean')
print("\nPivot Table with Average for Duplicate Entries:")
print(pivot_avg)


Pivot Table with Average for Duplicate Entries:
Category            A     B
Date                       
2023-01-01  13.333333  20.0
2023-01-02  30.000000  40.0


In [13]:
# Using pivot_table to handle duplicate entries and resetting index
pivot_table_df = df.pivot_table(index='Date', columns='Category', values='Value', aggfunc='sum')
pivot_reset_index = pivot_table_df.reset_index()
print("\nPivot Table with Reset Index:")
print(pivot_reset_index)


Pivot Table with Reset Index:
Category        Date   A   B
0         2023-01-01  40  20
1         2023-01-02  30  40


In [14]:
# Sample Data with an additional 'Value2' column
data = {
    'Date': ['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-02'],
    'Category': ['A', 'B', 'A', 'B'],
    'Value': [10, 20, 30, 40],
    'Value2': [100, 200, 300, 400]
}

df = pd.DataFrame(data)

# Pivoting with multiple value columns
multi_value_pivot = df.pivot(index='Date', columns='Category', values=['Value', 'Value2'])
print("\nPivot with Multiple Value Columns:")
print(multi_value_pivot)


Pivot with Multiple Value Columns:
           Value     Value2     
Category       A   B      A    B
Date                            
2023-01-01    10  20    100  200
2023-01-02    30  40    300  400
