In [2]:
### Object Creation
### Create a Series with categorical data.

import pandas as pd

# Create a pandas Series with categorical data type.
s = pd.Series(["a", "b", "c", "a"], dtype="category")
print("Categorical Series:")
print(s)
## A Series can be assigned a category dtype to save memory and improve performance.

Categorical Series:
0    a
1    b
2    c
3    a
dtype: category
Categories (3, object): ['a', 'b', 'c']


In [3]:
### Using pd.Categorical
### Create a categorical object directly using pd.Categorical.

import pandas as pd

# Create a Categorical object with repeated categories.
cat = pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c'])
print("\nCategorical Object:")
print(cat)

### pd.Categorical() creates a standalone categorical object.



Categorical Object:
['a', 'b', 'c', 'a', 'b', 'c']
Categories (3, object): ['a', 'b', 'c']


In [4]:
### Specifying Categories
## Manually define the categories and their order.


import pandas as pd

# Specify categories manually.
cat = pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c', 'd'], categories=['c', 'b', 'a'])
print("\nCategorical Object with Specified Categories:")
print(cat)

### The categories parameter allows you to explicitly define the valid categories, including their order.


Categorical Object with Specified Categories:
['a', 'b', 'c', 'a', 'b', 'c', NaN]
Categories (3, object): ['c', 'b', 'a']


In [5]:
### Ordered Categories
### Create an ordered categorical object.
import pandas as pd

# Create an ordered categorical object.
cat = pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c', 'd'], categories=['c', 'b', 'a'], ordered=True)
print("\nOrdered Categorical Object:")
print(cat)

### The ordered=True parameter indicates that the categories have a meaningful order.


Ordered Categorical Object:
['a', 'b', 'c', 'a', 'b', 'c', NaN]
Categories (3, object): ['c' < 'b' < 'a']


In [6]:
###5. Describing Categorical Data
### Summarize the categorical data using describe().

import pandas as pd
import numpy as np

# Create a Categorical object with some NaN values.
cat = pd.Categorical(["a", "c", "c", np.nan], categories=["b", "a", "c"])
df = pd.DataFrame({"cat": cat, "s": ["a", "c", "c", np.nan]})

# Display descriptive statistics for the DataFrame and the categorical column.
print("\nDescriptive Statistics for DataFrame:")
print(df.describe())
print("\nDescriptive Statistics for Categorical Column:")
print(df["cat"].describe())

### describe() provides summary statistics for categorical data, such as counts and frequencies.



Descriptive Statistics for DataFrame:
       cat  s
count    3  3
unique   2  2
top      c  c
freq     2  2

Descriptive Statistics for Categorical Column:
count     3
unique    2
top       c
freq      2
Name: cat, dtype: object


In [9]:
###  Get the Properties of Categories
### Retrieve the properties of a categorical object.

### a. Categories
import pandas as pd
import numpy as np

# Create a Categorical object and get its categories.
s = pd.Categorical(["a", "c", "c", np.nan], categories=["b", "a", "c"])
print("\nCategories in the Categorical Object:")
print(s.categories)
### b. Ordered Property

import pandas as pd
import numpy as np

# Check if the categories are ordered.
cat = pd.Categorical(["a", "c", "c", np.nan], categories=["b", "a", "c"])
print("\nIs the Categorical Object Ordered?")
print(cat.ordered)


Categories in the Categorical Object:
Index(['b', 'a', 'c'], dtype='object')

Is the Categorical Object Ordered?
False


In [13]:
import pandas as pd

# Create a Series with categorical data
s = pd.Series(["a", "b", "c", "a"], dtype="category")

# Rename categories using the rename_categories() method
s = s.cat.rename_categories(["Group %s" % g for g in s.cat.categories])

print("\nRenamed Categories:")
print(s.cat.categories)



Renamed Categories:
Index(['Group a', 'Group b', 'Group c'], dtype='object')


In [11]:
### Appending New Categories
### Add new categories to an existing categorical object.


import pandas as pd

# Add a new category to the existing ones.
s = pd.Series(["a", "b", "c", "a"], dtype="category")
s = s.cat.add_categories([4])
print("\nCategories After Adding New One:")
print(s.cat.categories)

##cat.add_categories() appends new categories without affecting existing data.



Categories After Adding New One:
Index(['a', 'b', 'c', 4], dtype='object')


In [12]:
## Removing Categories
### Remove a category from a categorical object.

import pandas as pd

# Remove a category from the categorical Series.
s = pd.Series(["a", "b", "c", "a"], dtype="category")
print("Original Series:")
print(s)

# Remove the category "a".
removed = s.cat.remove_categories("a")
print("\nSeries After Removing Category 'a':")
print(removed)

### cat.remove_categories() removes specified categories and converts their occurrences to NaN.


Original Series:
0    a
1    b
2    c
3    a
dtype: category
Categories (3, object): ['a', 'b', 'c']

Series After Removing Category 'a':
0    NaN
1      b
2      c
3    NaN
dtype: category
Categories (2, object): ['b', 'c']
