In [None]:
import pandas as pd

# Sample DataFrame
data = {
    'category': ['A', 'B', 'A', 'B', 'A', 'B'],
    'subcategory': ['X', 'X', 'Y', 'Y', 'X', 'Y'],
    'value': [10, 20, 15, 25, 10, 30],
    'other_column': [100, 200, 150, 250, 100, 300]
}
df = pd.DataFrame(data)

In [None]:
import pandas as pd

# Create a sample DataFrame
df = pd.DataFrame({
    'A': [1, 2, 3, 4, 5],
    'B': [10, 20, 30, 40, 50],
    'C': [100, 200, 300, 400, 500]
}, index=['row1', 'row2', 'row3', 'row4', 'row5'])

# loc examples
print(df.loc['row2'])  # Select row 'row2'
print(df.loc[:, 'B'])  # Select column 'B'
print(df.loc['row1', 'C'])  # Select value at 'row1' and column 'C'
print(df.loc[['row1', 'row3'], ['A', 'C']])  # Select multiple rows and columns
print(df.loc['row2':'row4'])  # Slice rows from 'row2' to 'row4'
print(df.loc[df['A'] > 2])  # Boolean indexing

# iloc examples
print(df.iloc[1])  # Select second row
print(df.iloc[:, 1])  # Select second column
print(df.iloc[0, 2])  # Select value at first row and third column
print(df.iloc[[0, 2], [0, 2]])  # Select multiple rows and columns by position
print(df.iloc[1:4])  # Slice rows from index 1 to 3
print(df.iloc[:, 0:2])  # Slice first two columns

In [None]:
# Mathematical operations: axis=0 columns, axis=1  rows

# Drop: axis=0 drops rows, axis=1 drops columns

In [None]:
df.head()

In [None]:
print(df.loc[:"row3", :"B"])
print("\n")
print(df.iloc[:2, :1])

In [2]:
import pandas as pd
import numpy as np

# Sample DataFrame
df = pd.DataFrame({
    'Category': ['A', 'B', 'A', 'B', 'A', 'C', 'C', 'D', 'D', 'D'],
    'SubCategory': ['X', 'X', 'Y', 'Y', 'X', 'X', 'Y', 'X', 'Y', 'Y'],
    'Value': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    'Quantity': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
})


# 1. Basic groupby
print("\n1. Basic groupby (mean of Value for each Category):")
print(df.groupby('Category')['Value'].mean())

# 2. Multiple columns groupby
print("\n2. Multiple columns groupby:")
print(df.groupby(['Category', 'SubCategory']).mean())

# # 3. Aggregate functions
# print("\n3. Multiple aggregate functions:")
# print(df.groupby('Category').agg(['mean', 'sum', 'count']))

# 4. Custom aggregate function
print("\n4. Custom aggregate function (max - min):")
print(df.groupby('Category').agg({'Value': lambda x: x.max() - x.min()}))

# 5. Multiple aggregations
print("\n5. Multiple aggregations:")
print(df.groupby('Category').agg({'Value': 'sum', 'Quantity': 'mean'}))

# 6. Transformation
print("\n6. Transformation (sum):")
print(df.groupby('Category').transform('sum'))

# 7. Filter groups
print("\n7. Filter groups (mean Value > 50):")
print(df.groupby('Category').filter(lambda x: x['Value'].mean() > 50))

# 8. Apply custom function
print("\n8. Apply custom function (sort within groups):")
print(df.groupby('Category').apply(lambda x: x.sort_values('Value')))

# 9. Get group sizes
print("\n9. Get group sizes:")
print(df.groupby('Category').size())

# 10. Iterate through groups
print("\n10. Iterate through groups:")
for name, group in df.groupby('Category'):
    print(f"Category: {name}")
    print(group)
    print()

# 11. Groupby with multiple columns and reset index
print("\n11. Groupby with multiple columns and reset index:")
print(df.groupby(['Category', 'SubCategory']).mean().reset_index())

# 12. First and last elements of each group
print("\n12. First and last elements of each group:")
print("First:")
print(df.groupby('Category').first())
print("\nLast:")
print(df.groupby('Category').last())

# 13. Groupby with custom aggregation dictionary
print("\n13. Groupby with custom aggregation dictionary:")
custom_agg = {
    'Value': ['mean', 'max'],
    'Quantity': ['sum', lambda x: x.max() - x.min()]
}
print(df.groupby('Category').agg(custom_agg))

# 14. Groupby with named aggregation
print("\n14. Groupby with named aggregation:")
print(df.groupby('Category').agg(
    avg_value=('Value', 'mean'),
    total_quantity=('Quantity', 'sum')
))

# 15. Groupby with pivot table
print("\n15. Groupby with pivot table:")
print(pd.pivot_table(df, values='Value', index='Category', columns='SubCategory', aggfunc='mean'))


1. Basic groupby (mean of Value for each Category):
Category
A    30.0
B    30.0
C    65.0
D    90.0
Name: Value, dtype: float64

2. Multiple columns groupby:
                      Value  Quantity
Category SubCategory                 
A        X             30.0       3.0
         Y             30.0       3.0
B        X             20.0       2.0
         Y             40.0       4.0
C        X             60.0       6.0
         Y             70.0       7.0
D        X             80.0       8.0
         Y             95.0       9.5

4. Custom aggregate function (max - min):
          Value
Category       
A            40
B            20
C            10
D            20

5. Multiple aggregations:
          Value  Quantity
Category                 
A            90       3.0
B            60       3.0
C           130       6.5
D           270       9.0

6. Transformation (sum):
  SubCategory  Value  Quantity
0         XYX     90         9
1          XY     60         6
2         XYX     9

  print(df.groupby('Category').apply(lambda x: x.sort_values('Value')))


In [4]:
df.head()

Unnamed: 0,Category,SubCategory,Value,Quantity
0,A,X,10,1
1,B,X,20,2
2,A,Y,30,3
3,B,Y,40,4
4,A,X,50,5


In [4]:
import pandas as pd
df = pd.DataFrame(
    {
        "Fruit": ["Apples", "Oranges", "Bananas", "Apples", "Oranges", "Bananas"],
        "Amount": [4, 1, 2, 2, 4, 5],
        "City": ["SF", "SF", "SF", "Montreal", "Montreal", "Montreal"],
        "Season": [
            "Summer",
            "Winter",
            "Summer",
            "Winter",
            "Summer",
            "Winter",
        ],  # New column
    }
)


In [8]:
df.loc[(df.City == "Montreal")&(df.Season == "Winter")]

Unnamed: 0,Fruit,Amount,City,Season
3,Apples,2,Montreal,Winter
5,Bananas,5,Montreal,Winter


In [None]:
df.loc[df.]