# Pandas – Customization, Querying, GroupBy, and Categorical Data

### Customizing Display Options

In [5]:
# Pandas allows customizing how DataFrames are displayed, including row/column limits and width constraints.

import pandas as pd

df = pd.DataFrame({
    'Name': ['Ram', 'Shyam', 'Hari', 'Gita', 'Sita'],
    'Age': [12, 13, 14, 15, 12],
    'Gender': ['M', 'M', 'M', 'F', 'F']
})

print("Default Display:\n", df)

Default Display:
     Name  Age Gender
0    Ram   12      M
1  Shyam   13      M
2   Hari   14      M
3   Gita   15      F
4   Sita   12      F


In [47]:
# Reset display settings to default (Avoid unexpected results)
pd.reset_option("all")  # Resets all display options

# Uncomment to test individual settings:
# pd.set_option('display.max_rows', 2)  # Limits max displayed rows
# pd.set_option('display.max_columns', 2)  # Limits max displayed columns
# pd.set_option('display.width', 20)  # Limits width of output
# pd.set_option('display.max_colwidth', 10)  # Limits column width

  pd.reset_option("all")  # Resets all display options
  pd.reset_option("all")  # Resets all display options


### Customizing Data Types

In [10]:
# Converting data types is crucial when reading data from external sources.

df = pd.DataFrame({
    'Name': ['Ram', 'Shyam', 'Hari', 'Gita', 'Sita'],
    'Age': ['12', '13', '14', '15', '12'],  # Age is stored as strings
    'Gender': ['M', 'M', 'M', 'F', 'F']
})

print("Before Conversion:\n", df.dtypes)

# Convert 'Age' column to integer
df['Age'] = df['Age'].astype(int)
# Ensure 'Gender' remains a string
df['Gender'] = df['Gender'].astype(str)

print("\nAfter Conversion:\n", df.dtypes)

Before Conversion:
 Name      object
Age       object
Gender    object
dtype: object

After Conversion:
 Name      object
Age        int32
Gender    object
dtype: object


### Query Method (Filtering Data)

In [15]:
# The .query() method provides an easy way to filter DataFrame rows based on conditions.

df = pd.DataFrame({
    'Name': ['Ram', 'Shyam', 'Hari', 'Gita', 'Sita'],
    'Age': [12, 13, 14, 15, 12],
    'Gender': ['M', 'M', 'M', 'F', 'F']
})

filtered_df = df.query('Age > 13 and Gender == "F"')
print("Filtered Data (Age > 13 & Gender = F):\n", filtered_df)

filtered_df2 = df.query('(Gender == "F") or (Age > 13)')
print("\nFiltered Data (Either Gender = F OR Age > 13):\n", filtered_df2)

Filtered Data (Age > 13 & Gender = F):
    Name  Age Gender
3  Gita   15      F

Filtered Data (Either Gender = F OR Age > 13):
    Name  Age Gender
2  Hari   14      M
3  Gita   15      F
4  Sita   12      F


### Window Functions (Rolling & Expanding)

In [20]:
# Rolling window functions compute values over a defined sliding window.

data = {'Value': [10, 20, 30, 40, 50, 60, 70, 80, 90]}
df = pd.DataFrame(data)

# Compute rolling sum over a window of 3
df['Rolling_Sum'] = df['Value'].rolling(window=3).sum()
print("\nRolling Sum with Window 3:\n", df)

# Expanding window functions compute cumulative statistics
# Example: df['Cumulative_Sum'] = df['Value'].expanding().sum()


Rolling Sum with Window 3:
    Value  Rolling_Sum
0     10          NaN
1     20          NaN
2     30         60.0
3     40         90.0
4     50        120.0
5     60        150.0
6     70        180.0
7     80        210.0
8     90        240.0


### GroupBy Function

In [23]:
# Grouping allows aggregating data based on a specific column.

df = pd.DataFrame({
    'Name': ['Ram', 'Shyam', 'Hari', 'Gita', 'Sita'],
    'Grade': ['B', 'A', 'C', 'A', 'B'],
    'Marks': [400, 500, 280, 500, 420]
})

# Group by 'Grade' and calculate average marks
grouped = df.groupby('Grade')['Marks'].mean()
print("\nAverage Marks by Grade:\n", grouped)


Average Marks by Grade:
 Grade
A    500.0
B    410.0
C    280.0
Name: Marks, dtype: float64


### Aggregation with GroupBy

In [26]:
# The .agg() method allows applying multiple functions at once.

df = pd.DataFrame({
    'Product': ['B', 'A', 'C', 'A', 'B'],
    'Region': ['South', 'South', 'South', 'North', 'North'],
    'Qty': [400, 500, 280, 480, 420]
})

agg_result = df.groupby('Product').agg({'Qty': ['sum', 'mean', 'min', 'max']})
print("\nAggregated Quantity Data:\n", agg_result)


Aggregated Quantity Data:
          Qty                 
         sum   mean  min  max
Product                      
A        980  490.0  480  500
B        820  410.0  400  420
C        280  280.0  280  280


### Transformation

In [29]:
# Transformation applies functions to each group while maintaining the original shape.

df = pd.DataFrame({
    'Group': ['D', 'E', 'F', 'F', 'D'],
    'Value': [2, 7, 6, 4, 3]
})

df['ModifiedValue'] = df.groupby('Group')['Value'].transform(lambda x: x * 2)
print("\nTransformed Values (Multiplied by 2):\n", df)


Transformed Values (Multiplied by 2):
   Group  Value  ModifiedValue
0     D      2              4
1     E      7             14
2     F      6             12
3     F      4              8
4     D      3              6


### Categorical Data

In [32]:
# Categorical data reduces memory usage and improves performance.

colors = pd.Series(['Blue', 'Red', 'Yellow', 'Green', 'Red', 'Blue', 'Yellow', 'Yellow', 'Yellow', 'Red'])

print("\nBefore Conversion to Categorical:\n", colors.info())

# Convert to categorical
df_colors = colors.astype('category')
print("\nCategorical Data:\n", df_colors)
print("Unique Categories:", df_colors.unique())

<class 'pandas.core.series.Series'>
RangeIndex: 10 entries, 0 to 9
Series name: None
Non-Null Count  Dtype 
--------------  ----- 
10 non-null     object
dtypes: object(1)
memory usage: 212.0+ bytes

Before Conversion to Categorical:
 None

Categorical Data:
 0      Blue
1       Red
2    Yellow
3     Green
4       Red
5      Blue
6    Yellow
7    Yellow
8    Yellow
9       Red
dtype: category
Categories (4, object): ['Blue', 'Green', 'Red', 'Yellow']
Unique Categories: ['Blue', 'Red', 'Yellow', 'Green']
Categories (4, object): ['Blue', 'Green', 'Red', 'Yellow']


In [34]:
# Reordering categories

df_colors = df_colors.cat.reorder_categories(['Red', 'Green', 'Yellow', 'Blue'])
print("\nReordered Categories:\n", df_colors)


Reordered Categories:
 0      Blue
1       Red
2    Yellow
3     Green
4       Red
5      Blue
6    Yellow
7    Yellow
8    Yellow
9       Red
dtype: category
Categories (4, object): ['Red', 'Green', 'Yellow', 'Blue']


### Additional Categorical Methods

In [39]:
# - set_categories(): Define custom category order

cat_colors = pd.Series(['Red', 'Green', 'Blue', 'Yellow']).astype('category')
ordered_colors = cat_colors.cat.set_categories(['Blue', 'Green', 'Yellow', 'Red'])
print(ordered_colors)

0       Red
1     Green
2      Blue
3    Yellow
dtype: category
Categories (4, object): ['Blue', 'Green', 'Yellow', 'Red']


In [41]:
# - remove_categories(): Remove specific categories

cat_colors = pd.Series(['Red', 'Green', 'Blue', 'Yellow']).astype('category')
cat_colors = cat_colors.cat.remove_categories(['Green'])
print(cat_colors)

0       Red
1       NaN
2      Blue
3    Yellow
dtype: category
Categories (3, object): ['Blue', 'Red', 'Yellow']


In [43]:
# - rename_categories(): Rename categories

cat_colors = pd.Series(['Red', 'Green', 'Blue', 'Yellow']).astype('category')
renamed_colors = cat_colors.cat.rename_categories({'Red': 'Crimson', 'Blue': 'Azure'})
print(renamed_colors)

0    Crimson
1      Green
2      Azure
3     Yellow
dtype: category
Categories (4, object): ['Azure', 'Green', 'Crimson', 'Yellow']


In [45]:
# - remove_unused_categories(): Clean up unused categories

cat_colors = pd.Series(['Red', 'Red', 'Blue']).astype('category')
cat_colors = cat_colors.cat.remove_unused_categories()
print(cat_colors)

0     Red
1     Red
2    Blue
dtype: category
Categories (2, object): ['Blue', 'Red']
