In [1]:
import pandas as pd

# Create a sample DataFrame
df = pd.DataFrame({
    'A': [1, 2, 3, 4, 5],
    'B': [10, 20, 30, 40, 50],
    'C': [100, 200, 300, 400, 500]
}, index=['row1', 'row2', 'row3', 'row4', 'row5'])

# loc examples
print(df.loc['row2'])  # Select row 'row2'
print(df.loc[:, 'B'])  # Select column 'B'
print(df.loc['row1', 'C'])  # Select value at 'row1' and column 'C'
print(df.loc[['row1', 'row3'], ['A', 'C']])  # Select multiple rows and columns
print(df.loc['row2':'row4'])  # Slice rows from 'row2' to 'row4' : start : step <- Step included 
print(df.loc[df['A'] > 2])  # Boolean indexing this is performed on row 
print(df.loc[(df.A <= 3) & (df.B <=30 ),"B"])

# iloc examples
print(df.iloc[1])  # Select second row
print(df.iloc[:, 1])  # Select second column
print(df.iloc[0, 2])  # Select value at first row and third column
print(df.iloc[[0, 2], [0, 2]])  # Select multiple rows and columns by position
print(df.iloc[1:4])  # Slice rows from index 1 to 3
print(df.iloc[:, 0:2])  # Slice first two columns

A      2
B     20
C    200
Name: row2, dtype: int64
row1    10
row2    20
row3    30
row4    40
row5    50
Name: B, dtype: int64
100
      A    C
row1  1  100
row3  3  300
      A   B    C
row2  2  20  200
row3  3  30  300
row4  4  40  400
      A   B    C
row3  3  30  300
row4  4  40  400
row5  5  50  500
A      2
B     20
C    200
Name: row2, dtype: int64
row1    10
row2    20
row3    30
row4    40
row5    50
Name: B, dtype: int64
100
      A    C
row1  1  100
row3  3  300
      A   B    C
row2  2  20  200
row3  3  30  300
row4  4  40  400
      A   B
row1  1  10
row2  2  20
row3  3  30
row4  4  40
row5  5  50


In [2]:
# Mathematical operations: axis=0 columns, axis=1  rows

# Drop: axis=0 drops rows, axis=1 drops columns

In [3]:
print(df.loc[:"row3", :"B"])
print("\n")
print(df.iloc[:2, :1])

      A   B
row1  1  10
row2  2  20
row3  3  30


      A
row1  1
row2  2


In [6]:
import pandas as pd
import numpy as np

# Sample DataFrame
df = pd.DataFrame({
    'Category': ['A', 'B', 'A', 'B', 'A', 'C', 'C', 'D', 'D', 'D'],
    'SubCategory': ['X', 'X', 'Y', 'Y', 'X', 'X', 'Y', 'X', 'Y', 'Y'],
    'Value': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    'Quantity': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
})


# 1. Basic groupby
print("\n1. Basic groupby (mean of Value for each Category):")
print(df.groupby('Category')['Value'].mean())

# 2. Multiple columns groupby
print("\n2. Multiple columns groupby:")
print(df.groupby(['Category', 'SubCategory']).mean())




1. Basic groupby (mean of Value for each Category):
Category
A    30.0
B    30.0
C    65.0
D    90.0
Name: Value, dtype: float64

2. Multiple columns groupby:
                      Value  Quantity
Category SubCategory                 
A        X             30.0       3.0
         Y             30.0       3.0
B        X             20.0       2.0
         Y             40.0       4.0
C        X             60.0       6.0
         Y             70.0       7.0
D        X             80.0       8.0
         Y             95.0       9.5


In [7]:
# 5. Multiple aggregations
print("\n5. Multiple aggregations:")
print(df.groupby('Category').agg({'Value': 'sum', 'Quantity': 'mean'}))

# 6. Transformation
print("\n6. Transformation (sum):")
print(df.groupby('Category').transform('sum'))




5. Multiple aggregations:
          Value  Quantity
Category                 
A            90       3.0
B            60       3.0
C           130       6.5
D           270       9.0

6. Transformation (sum):
  SubCategory  Value  Quantity
0         XYX     90         9
1          XY     60         6
2         XYX     90         9
3          XY     60         6
4         XYX     90         9
5          XY    130        13
6          XY    130        13
7         XYY    270        27
8         XYY    270        27
9         XYY    270        27


In [10]:
# 9. Get group sizes
print("\n9. Get group sizes:")
print(df.groupby('Category').size())


# 11. Groupby with multiple columns and reset index
print("\n11. Groupby with multiple columns and reset index:")
print(df.groupby(['Category', 'SubCategory']).mean())




9. Get group sizes:
Category
A    3
B    2
C    2
D    3
dtype: int64

11. Groupby with multiple columns and reset index:
                      Value  Quantity
Category SubCategory                 
A        X             30.0       3.0
         Y             30.0       3.0
B        X             20.0       2.0
         Y             40.0       4.0
C        X             60.0       6.0
         Y             70.0       7.0
D        X             80.0       8.0
         Y             95.0       9.5


In [13]:
df["Category"].value_counts() # sorted 
df.groupby('Category').size() # unsorted but same 

(Category
 A    3
 D    3
 B    2
 C    2
 Name: count, dtype: int64,
 Category
 A    3
 B    2
 C    2
 D    3
 dtype: int64)