In [1]:
import pandas as pd
# Sample data (sales figures)
data = {'Store': ['Store A', 'Store B', 'Store A', 'Store B'],
'Product': ['Shirt', 'Hat', 'Shirt', 'Shoes'],
'Sales': [100, 50, 120, 80]}
df = pd.DataFrame(data)
# Pivot table to see total sales per store by product
df_pivoted = df.pivot_table(values='Sales', index='Store', columns='Product', aggfunc='sum')
print(df_pivoted)

Product   Hat  Shirt  Shoes
Store                      
Store A   NaN  220.0    NaN
Store B  50.0    NaN   80.0


In [2]:
import pandas as pd
# Sample data (customer purchases)
data = {'CustomerID': [1, 2, 1, 3],
'ProductA': [2, 0, 1, 4],
'ProductB': [1, 3, 0, 2]}
df = pd.DataFrame(data)
# Melt data to separate product categories and quantities
df_melted = df.melt(id_vars='CustomerID', var_name='Product', value_name='Quantity')
print(df_melted)

   CustomerID   Product  Quantity
0           1  ProductA         2
1           2  ProductA         0
2           1  ProductA         1
3           3  ProductA         4
4           1  ProductB         1
5           2  ProductB         3
6           1  ProductB         0
7           3  ProductB         2


In [3]:
import pandas as pd
# Sample data
df = pd.DataFrame({'City': ['New York', 'Los Angeles', 'New York', 'Los Angeles'],
'Year': [2022, 2022, 2023, 2023],
'Population': [8000000, 4000000, 8100000, 4050000]})
df.set_index(['City', 'Year'], inplace=True)
# Stack the data
stacked_df = df.stack()
print(stacked_df)
# Unstack the data
unstacked_df = stacked_df.unstack()
print(unstacked_df)

City         Year            
New York     2022  Population    8000000
Los Angeles  2022  Population    4000000
New York     2023  Population    8100000
Los Angeles  2023  Population    4050000
dtype: int64
                  Population
City        Year            
Los Angeles 2022     4000000
            2023     4050000
New York    2022     8000000
            2023     8100000


In [4]:
import pandas as pd
# Sample data
df = pd.DataFrame({'Height': [5.5, 6.1, 5.8, 5.9],
'Weight': [150, 180, 160, 170]})
# Create a new feature: Body Mass Index (BMI)
df['BMI'] = df['Weight'] / (df['Height'] ** 2)
print(df)
# Transforming Height from feet to inches
df['Height_inches'] = df['Height'] * 12
print(df)

   Height  Weight       BMI
0     5.5     150  4.958678
1     6.1     180  4.837409
2     5.8     160  4.756243
3     5.9     170  4.883654
   Height  Weight       BMI  Height_inches
0     5.5     150  4.958678           66.0
1     6.1     180  4.837409           73.2
2     5.8     160  4.756243           69.6
3     5.9     170  4.883654           70.8


In [8]:
import pandas as pd
data = {'Animal': ['Cat', 'Dog', 'Rabbit', 'Dog', 'Cat', 'Rabbit', 'Rabbit'],
'Color': ['White', 'Brown', 'White', 'Black', 'Black', 'Brown', 'White']}
df = pd.DataFrame(data)
# Convert columns to categorical data type
df['Animal'] = df['Animal'].astype('category')
df['Color'] = df['Color'].astype('category')
# Generate one-hot encoded variables
dummy_df = pd.get_dummies(df)
print("Original DataFrame:")
print(df)
print("\nOne-Hot Encoded DataFrame:")
print(dummy_df)

Original DataFrame:
   Animal  Color
0     Cat  White
1     Dog  Brown
2  Rabbit  White
3     Dog  Black
4     Cat  Black
5  Rabbit  Brown
6  Rabbit  White

One-Hot Encoded DataFrame:
   Animal_Cat  Animal_Dog  Animal_Rabbit  Color_Black  Color_Brown  \
0        True       False          False        False        False   
1       False        True          False        False         True   
2       False       False           True        False        False   
3       False        True          False         True        False   
4        True       False          False         True        False   
5       False       False           True        False         True   
6       False       False           True        False        False   

   Color_White  
0         True  
1        False  
2         True  
3        False  
4        False  
5        False  
6         True  


In [10]:
import pandas as pd
# Nominal data (color)
colors = pd.Categorical(['Red', 'Green', 'Blue', 'Red'])
# Ordinal data (education levels)
education = pd.Categorical(['Bachelor', 'Master', 'PhD'],ordered=True, categories=['Bachelor', 'Master', 'PhD'])
print(colors.categories) # Access categories
print(education.ordered) # Check if ordered
# Access elements by label (ordinal only, considering order)
print(education[1]) # Access the element with label 1 (which is category 2)

Index(['Blue', 'Green', 'Red'], dtype='object')
True
Master


In [11]:
import pandas as pd
# Discrete data (number of customers)
customers_served = pd.Series([25, 30, 18, 22])
# Continuous data (temperature)
temperatures = pd.Series([20.5, 22.8, 19.2, 23.1])
# Perform calculations (e.g., mean, standard deviation)
average_customers = customers_served.mean()
average_temp = temperatures.mean()
print(average_customers)
print(average_temp)

23.75
21.4


In [12]:
import pandas as pd
# Sample data with categorical type
df = pd.DataFrame({'Animal': ['Dog', 'Cat', 'Bird', 'Dog', 'Bird'],
'Size': ['S', 'M', 'L', 'M', 'S']})
df['Animal'] = pd.Categorical(df['Animal'])
print(df['Animal'].dtype)
# Use the Categorical type for ordered data
df['Size'] = pd.Categorical(df['Size'], categories=['S', 'M', 'L'], ordered=True)
print(df['Size'].dtype)

category
category


In [13]:
import pandas as pd
# Sample DataFrame
df = pd.DataFrame({'Color': ['Red', 'Blue', 'Green', 'Blue', 'Red'],
'Size': ['S', 'M', 'L', 'M', 'S']})
# Display the DataFrame and datatypes
print(df)
print('\nDatatype before conversion:')
print(df.dtypes)
# Convert columns to Categorical datatype
df['Color'] = df['Color'].astype('category')
df['Size'] = df['Size'].astype('category')
#Print dataypes
print('\nDatatype after conversion: ')
print(df.dtypes)

   Color Size
0    Red    S
1   Blue    M
2  Green    L
3   Blue    M
4    Red    S

Datatype before conversion:
Color    object
Size     object
dtype: object

Datatype after conversion: 
Color    category
Size     category
dtype: object


In [14]:
import pandas as pd
# Create a sample DataFrame
data = {'column1': [1, 2, 3],
'column2': [4, 5, 6],
'column3': [7, 8, 9]}
df = pd.DataFrame(data)
# Convert multiple columns to the 'category' data type
columns_to_convert = ['column1', 'column2', 'column3']
df[columns_to_convert] = df[columns_to_convert].astype('category')
# Print the converted columns
print(df[columns_to_convert])

  column1 column2 column3
0       1       4       7
1       2       5       8
2       3       6       9


In [16]:
import pandas as pd
# Sample DataFrame
data = {'Animal': ['Cat', 'Dog', 'Rabbit', 'Dog', 'Cat', 'Rabbit', 'Rabbit'],
'Color': ['White', 'Brown', 'White', 'Black', 'Black', 'Brown', 'White']}
df = pd.DataFrame(data)
# Convert columns to categorical
df['Animal'] = df['Animal'].astype('category')
df['Color'] = pd.Categorical(df['Color'], categories=['Black', 'White', 'Brown'], ordered=True)
# Display the integer codes of the categories
print("Animal codes:", df['Animal'].cat.codes.tolist())
print("\nColor codes:", df['Color'].cat.codes.tolist())
# Display the categories of the categorical columns
print("\nAnimal categories:", df['Animal'].cat.categories)
print("\nColor categories:", df['Color'].cat.categories)
# Add a new category to 'Animal' column and then remove unused categories
df['Animal'] = df['Animal'].cat.add_categories(['Elephant'])
print("\nBefore removing unused categories:")
print(df['Animal'].cat.categories)
# Remove unused categories
df['Animal'] = df['Animal'].cat.remove_unused_categories()
print("\nAfter removing unused categories:")
print(df['Animal'].cat.categories)
# Reorder categories of 'Color' column
df['Color'] = df['Color'].cat.set_categories(['Brown', 'Black', 'White'], ordered=True)
print("\nReordered Color categories:", df['Color'].cat.categories)
print("\nReordered Color values:", df['Color'].tolist())

Animal codes: [0, 1, 2, 1, 0, 2, 2]

Color codes: [1, 2, 1, 0, 0, 2, 1]

Animal categories: Index(['Cat', 'Dog', 'Rabbit'], dtype='object')

Color categories: Index(['Black', 'White', 'Brown'], dtype='object')

Before removing unused categories:
Index(['Cat', 'Dog', 'Rabbit', 'Elephant'], dtype='object')

After removing unused categories:
Index(['Cat', 'Dog', 'Rabbit'], dtype='object')

Reordered Color categories: Index(['Brown', 'Black', 'White'], dtype='object')

Reordered Color values: ['White', 'Brown', 'White', 'Black', 'Black', 'Brown', 'White']


In [17]:
import pandas as pd
# Sample DataFrame
data = {
'Fruit': ['Apple', 'Banana', 'Cherry', 'Banana', 'Apple', 'Cherry'],
'Quality': ['Good', 'Bad', 'Excellent', 'Good', 'Bad', 'Excellent']}
df = pd.DataFrame(data)
# Convert columns to categorical datatype using pd.Categorical constructor
df['Fruit'] = pd.Categorical(df['Fruit'])
df['Quality'] = pd.Categorical(df['Quality'], categories=['Bad', 'Good', 'Excellent'], ordered=True)
# Finding value_counts
fruit_counts = df['Fruit'].value_counts()
quality_counts = df['Quality'].value_counts()
# Finding mode
fruit_mode = df['Fruit'].mode()[0]
quality_mode = df['Quality'].mode()[0]
# Sorting values
sorted_fruit = df.sort_values('Fruit')
sorted_quality = df.sort_values('Quality')
# Display results
print("Value Counts - Fruit:\n", fruit_counts)
print("\nValue Counts - Quality:\n", quality_counts)
print("\nMode - Fruit:", fruit_mode)
print("Mode - Quality:", quality_mode)
print("\nSorted by Fruit:\n", sorted_fruit)
print("\nSorted by Quality:\n", sorted_quality)

Value Counts - Fruit:
 Fruit
Apple     2
Banana    2
Cherry    2
Name: count, dtype: int64

Value Counts - Quality:
 Quality
Bad          2
Good         2
Excellent    2
Name: count, dtype: int64

Mode - Fruit: Apple
Mode - Quality: Bad

Sorted by Fruit:
     Fruit    Quality
0   Apple       Good
4   Apple        Bad
1  Banana        Bad
3  Banana       Good
2  Cherry  Excellent
5  Cherry  Excellent

Sorted by Quality:
     Fruit    Quality
1  Banana        Bad
4   Apple        Bad
0   Apple       Good
3  Banana       Good
2  Cherry  Excellent
5  Cherry  Excellent
