1. Explicit Indexes

In [1]:
import pandas as pd

data = {'Name': ['Alice', 'Bob', 'Charlie'], 'Score': [85, 90, 95]}
df = pd.DataFrame(data, index=['a', 'b', 'c'])

print(df)

      Name  Score
a    Alice     85
b      Bob     90
c  Charlie     95


2. Setting and Removing Indexes

In [None]:
df = pd.DataFrame({
    'City': ['New York', 'Paris', 'London'],
    'Temperature': [21, 25, 19]
})

# Set 'City' as index
df_indexed = df.set_index('City')
print(df_indexed)

# Reset to default index
df_reset = df_indexed.reset_index()
print(df_reset)

          Temperature
City                 
New York           21
Paris              25
London             19
       City  Temperature
0  New York           21
1     Paris           25
2    London           19


3. Subsetting with .loc[]

In [3]:
print(df_indexed.loc['Paris'])          # Single row
print(df_indexed.loc[['Paris', 'London']])  # Multiple rows

Temperature    25
Name: Paris, dtype: int64
        Temperature
City               
Paris            25
London           19


4. Setting Multi-level Indexes

In [None]:
df = pd.DataFrame({
    'Year': [2020, 2020, 2021, 2021],
    'City': ['New York', 'London', 'New York', 'London'],
    'Temperature': [30, 22, 35, 25]
})

df_multi = df.set_index(['Year', 'City'])
print(df_multi)

               Temperature
Year City                 
2020 New York           30
     London             22
2021 New York           35
     London             25


5. Sorting by Index Values

In [5]:
sorted_df = df_multi.sort_index()
print(sorted_df)

# Sort in descending order
print(df_multi.sort_index(ascending=False))

               Temperature
Year City                 
2020 London             22
     New York           30
2021 London             25
     New York           35
               Temperature
Year City                 
2021 New York           35
     London             25
2020 New York           30
     London             22


6. Slicing and Subsetting with .loc[] and .iloc[]

In [6]:
# loc with slice
print(df_indexed.loc['London':'Paris'])

# iloc: Get rows 0 and 1
print(df.iloc[0:2])

Empty DataFrame
Columns: [Temperature]
Index: []
   Year      City  Temperature
0  2020  New York           30
1  2020    London           22


7. Slicing Index Values

In [7]:
# Assuming index is sorted
print(df_indexed.sort_index().loc['London':'Paris'])

          Temperature
City                 
London             19
New York           21
Paris              25


8. Slicing in Both Directions

In [8]:
print(df.loc[0:1, 'City':'Temperature'])  # Rows 0 and 1, all columns between City and Temperature

       City  Temperature
0  New York           30
1    London           22


9. Slicing Time Series

In [9]:
date_index = pd.date_range('2020-01-01', periods=5)
df = pd.DataFrame({'Temp': [22, 23, 21, 20, 19]}, index=date_index)

print(df['2020-01-02':'2020-01-04'])

            Temp
2020-01-02    23
2020-01-03    21
2020-01-04    20


10. Subsetting by Row/Column Number

In [10]:
print(df.iloc[0:2, 0:1])  # First 2 rows, first column

            Temp
2020-01-01    22
2020-01-02    23


11. Working with Pivot Tables

In [14]:
import pandas as pd

# Sample dataset
df = pd.DataFrame({
    'City': ['New York', 'New York', 'Los Angeles', 'Los Angeles', 'Chicago', 'Chicago'],
    'Year': [2020, 2021, 2020, 2021, 2020, 2021],
    'Temperature': [30, 35, 25, 28, 20, 22]
})

# Create pivot table
pivot = df.pivot_table(
    values='Temperature',
    index='Year',          # group by rows
    columns='City',        # spread by city
    aggfunc='mean'         # what calculation to apply
)

print(pivot)

City  Chicago  Los Angeles  New York
Year                                
2020     20.0         25.0      30.0
2021     22.0         28.0      35.0


12. Pivot Temperature by City and Year

In [11]:
df = pd.DataFrame({
    'Year': [2020, 2020, 2021, 2021],
    'City': ['NY', 'LA', 'NY', 'LA'],
    'Temperature': [30, 25, 32, 28]
})

pivot = df.pivot_table(values='Temperature', index='Year', columns='City')
print(pivot)

City    LA    NY
Year            
2020  25.0  30.0
2021  28.0  32.0


13. Subsetting Pivot Tables

In [12]:
# Subset specific value
print(pivot['LA'])

# Subset specific row & column
print(pivot.loc[2021, 'NY'])

Year
2020    25.0
2021    28.0
Name: LA, dtype: float64
32.0


14. Calculating on a Pivot Table

In [13]:
# Mean temperature per year
print(pivot.mean(axis=1))  # Mean across cities for each year

# Mean temperature per city
print(pivot.mean(axis=0))  # Mean across years for each city

Year
2020    27.5
2021    30.0
dtype: float64
City
LA    26.5
NY    31.0
dtype: float64
