In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Set display options
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 10)
pd.set_option('display.width', 100)

In [3]:
# 1. Types of Indexes
# -------------------------------------------------
print("## 1. Types of Indexes")

# Basic Index
idx1 = pd.Index([1, 2, 3, 4])
print("Basic Index:")
print(idx1)
print()

# RangeIndex
idx2 = pd.RangeIndex(start=0, stop=10, step=1)
print("RangeIndex:")
print(idx2)
print()

# CategoricalIndex
idx3 = pd.CategoricalIndex(['a', 'b', 'c', 'a'])
print("CategoricalIndex:")
print(idx3)
print()

# MultiIndex (hierarchical)
idx4 = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)])
print("MultiIndex:")
print(idx4)
print()

# DatetimeIndex
idx5 = pd.DatetimeIndex(['2020-01-01', '2020-01-02'])
print("DatetimeIndex:")
print(idx5)
print()

# TimedeltaIndex
idx6 = pd.TimedeltaIndex(['1 day', '2 days'])
print("TimedeltaIndex:")
print(idx6)
print()

# PeriodIndex
idx7 = pd.PeriodIndex(['2020-01', '2020-02'], freq='M')
print("PeriodIndex:")
print(idx7)
print()

# IntervalIndex
idx8 = pd.IntervalIndex.from_arrays([0, 1], [1, 2])
print("IntervalIndex:")
print(idx8)
print()

# Int64Index
idx9 = pd.Int64Index([1, 2, 3])
print("Int64Index:")
print(idx9)
print()

# UInt64Index
idx10 = pd.UInt64Index([1, 2, 3])
print("UInt64Index:")
print(idx10)
print()

# Float64Index
idx11 = pd.Float64Index([1.0, 2.0, 3.0])
print("Float64Index:")
print(idx11)
print()

## 1. Types of Indexes
Basic Index:
Int64Index([1, 2, 3, 4], dtype='int64')

RangeIndex:
RangeIndex(start=0, stop=10, step=1)

CategoricalIndex:
CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c'], ordered=False, dtype='category')

MultiIndex:
MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1)],
           )

DatetimeIndex:
DatetimeIndex(['2020-01-01', '2020-01-02'], dtype='datetime64[ns]', freq=None)

TimedeltaIndex:
TimedeltaIndex(['1 days', '2 days'], dtype='timedelta64[ns]', freq=None)

PeriodIndex:
PeriodIndex(['2020-01', '2020-02'], dtype='period[M]')

IntervalIndex:
IntervalIndex([(0, 1], (1, 2]], dtype='interval[int64, right]')

Int64Index:
Int64Index([1, 2, 3], dtype='int64')

UInt64Index:
UInt64Index([1, 2, 3], dtype='uint64')

Float64Index:
Float64Index([1.0, 2.0, 3.0], dtype='float64')



In [4]:
# 2. Index Attributes
# -------------------------------------------------
print("## 2. Index Attributes")

# Create a simple index for demonstration
idx = pd.Index([1, 2, 3, 3, 4, np.nan])
print("Index:")
print(idx)
print()

print(f"Values: {idx.values}")
print(f"Data type: {idx.dtype}")
print(f"Shape: {idx.shape}")
print(f"Number of dimensions: {idx.ndim}")
print(f"Size: {idx.size}")
print(f"Memory usage: {idx.nbytes} bytes")

# Optional names
idx_named = pd.Index([1, 2, 3], name='numbers')
print("\nNamed Index:")
print(idx_named)
print(f"Name: {idx_named.name}")

# Properties for checking nature of index
print("\nProperties:")
print(f"Is monotonic increasing: {idx.is_monotonic_increasing}")
print(f"Is monotonic decreasing: {idx.is_monotonic_decreasing}")
print(f"Is unique: {idx.is_unique}")
print(f"Has duplicates: {idx.has_duplicates}")
print(f"Has NaN values: {idx.hasnans}")

# MultiIndex specific attributes
midx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)])
print("\nMultiIndex:")
print(midx)
print(f"Names: {midx.names}")
print(f"Number of levels: {midx.nlevels}")
print("Levels:")
for i, level in enumerate(midx.levels):
    print(f"Level {i}: {level}")
print()

## 2. Index Attributes
Index:
Float64Index([1.0, 2.0, 3.0, 3.0, 4.0, nan], dtype='float64')

Values: [ 1.  2.  3.  3.  4. nan]
Data type: float64
Shape: (6,)
Number of dimensions: 1
Size: 6
Memory usage: 48 bytes

Named Index:
Int64Index([1, 2, 3], dtype='int64', name='numbers')
Name: numbers

Properties:
Is monotonic increasing: False
Is monotonic decreasing: False
Is unique: False
Has duplicates: True
Has NaN values: True

MultiIndex:
MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1)],
           )
Names: [None, None]
Number of levels: 2
Levels:
Level 0: Index(['a', 'b'], dtype='object')
Level 1: Int64Index([1, 2], dtype='int64')



In [5]:
# 3. Basic Index Operations
# -------------------------------------------------
print("## 3. Basic Index Operations")

# Create a simple index for demonstration
idx = pd.Index([10, 20, 30, 40, 50])
print("Index:")
print(idx)
print()

# Indexing and slicing
print(f"Get element at position 0: {idx[0]}")
print("Slice from position 1 to 3:")
print(idx[1:3])
print()

# Copy
idx_copy = idx.copy()
print("Copy of index:")
print(idx_copy)
print()

# Drop values
print("Drop values at positions 0 and 1:")
print(idx.delete([0, 1]))
print()

# Append another index
idx2 = pd.Index([60, 70])
print("Append another index:")
print(idx.append(idx2))
print()

# Insert value
print("Insert value 25 at position 2:")
print(idx.insert(2, 25))
print()

# Set operations
idx_a = pd.Index([1, 2, 3, 4])
idx_b = pd.Index([3, 4, 5, 6])
print("Index A:")
print(idx_a)
print("Index B:")
print(idx_b)
print()

print("Intersection:")
print(idx_a.intersection(idx_b))
print()

print("Union:")
print(idx_a.union(idx_b))
print()

print("Difference (A - B):")
print(idx_a.difference(idx_b))
print()

print("Symmetric difference:")
print(idx_a.symmetric_difference(idx_b))
print()

# Type conversion
print("Convert to int64:")
idx_float = pd.Index([1.0, 2.0, 3.0])
print(idx_float.astype('int64'))
print()

# Unique values
idx_dup = pd.Index([1, 2, 2, 3, 3, 3])
print("Index with duplicates:")
print(idx_dup)
print("Unique values:")
print(idx_dup.unique())
print()

# Duplicated mask
print("Boolean mask for duplicates:")
print(idx_dup.duplicated())
print()

# Sort
idx_unsorted = pd.Index([3, 1, 4, 2])
print("Unsorted index:")
print(idx_unsorted)
print("Sorted index:")
print(idx_unsorted.sort_values())
print()


## 3. Basic Index Operations
Index:
Int64Index([10, 20, 30, 40, 50], dtype='int64')

Get element at position 0: 10
Slice from position 1 to 3:
Int64Index([20, 30], dtype='int64')

Copy of index:
Int64Index([10, 20, 30, 40, 50], dtype='int64')

Drop values at positions 0 and 1:
Int64Index([30, 40, 50], dtype='int64')

Append another index:
Int64Index([10, 20, 30, 40, 50, 60, 70], dtype='int64')

Insert value 25 at position 2:
Int64Index([10, 20, 25, 30, 40, 50], dtype='int64')

Index A:
Int64Index([1, 2, 3, 4], dtype='int64')
Index B:
Int64Index([3, 4, 5, 6], dtype='int64')

Intersection:
Int64Index([3, 4], dtype='int64')

Union:
Int64Index([1, 2, 3, 4, 5, 6], dtype='int64')

Difference (A - B):
Int64Index([1, 2], dtype='int64')

Symmetric difference:
Int64Index([1, 2, 5, 6], dtype='int64')

Convert to int64:
Int64Index([1, 2, 3], dtype='int64')

Index with duplicates:
Int64Index([1, 2, 2, 3, 3, 3], dtype='int64')
Unique values:
Int64Index([1, 2, 3], dtype='int64')

Boolean mask for dup

In [6]:
# 4. MultiIndex Specific Methods
# -------------------------------------------------
print("## 4. MultiIndex Specific Methods")

# Create a MultiIndex
midx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b', 2)], names=['letter', 'number'])
print("MultiIndex:")
print(midx)
print()

# Access properties
print(f"Names: {midx.names}")
print(f"Number of levels: {midx.nlevels}")
print("Levels:")
for i, level in enumerate(midx.levels):
    print(f"  Level {i} ({midx.names[i]}): {level}")
print("Codes:")
for i, code in enumerate(midx.codes):
    print(f"  Code {i}: {code}")
print()

# Swap levels
print("Swap levels 0 and 1:")
print(midx.swaplevel(0, 1))
print()

# Drop level
print("Drop level 0:")
print(midx.droplevel(0))
print()

# Set levels
print("Set new level values:")
print(midx.set_levels([['c', 'd'], [3, 4]], level=[0, 1]))
print()

# Set codes
print("Set new codes:")
print(midx.set_codes([[1, 1, 0, 0], [1, 0, 1, 0]], level=[0, 1]))
print()

# Convert to DataFrame
print("Convert to DataFrame:")
print(midx.to_frame())
print()

# Convert to flat index
print("Convert to flat index:")
print(midx.to_flat_index())
print()

# Sort by level
print("Sort by level 1:")
print(midx.sortlevel(1)[0])
print()

# Creation from various structures
print("MultiIndex from arrays:")
print(pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], [1, 2, 1, 2]], names=['letter', 'number']))
print()

print("MultiIndex from product:")
print(pd.MultiIndex.from_product([['a', 'b'], [1, 2]], names=['letter', 'number']))
print()


## 4. MultiIndex Specific Methods
MultiIndex:
MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           names=['letter', 'number'])

Names: ['letter', 'number']
Number of levels: 2
Levels:
  Level 0 (letter): Index(['a', 'b'], dtype='object', name='letter')
  Level 1 (number): Int64Index([1, 2], dtype='int64', name='number')
Codes:
  Code 0: [0 0 1 1]
  Code 1: [0 1 0 1]

Swap levels 0 and 1:
MultiIndex([(1, 'a'),
            (2, 'a'),
            (1, 'b'),
            (2, 'b')],
           names=['number', 'letter'])

Drop level 0:
Int64Index([1, 2, 1, 2], dtype='int64', name='number')

Set new level values:
MultiIndex([('c', 3),
            ('c', 4),
            ('d', 3),
            ('d', 4)],
           names=['letter', 'number'])

Set new codes:
MultiIndex([('b', 2),
            ('b', 1),
            ('a', 2),
            ('a', 1)],
           names=['letter', 'number'])

Convert to DataFrame:
              letter  number
letter number    

In [7]:

# 5. DatetimeIndex Specific Methods
# -------------------------------------------------
print("## 5. DatetimeIndex Specific Methods")

# Create a DatetimeIndex
dtidx = pd.DatetimeIndex(['2020-01-01', '2020-01-15', '2020-02-01', '2020-03-01'])
print("DatetimeIndex:")
print(dtidx)
print()

# Date components
print("Year component:")
print(dtidx.year)
print()

print("Month component:")
print(dtidx.month)
print()

print("Day component:")
print(dtidx.day)
print()

print("Day of week:")
print(dtidx.dayofweek)
print()

print("Day of year:")
print(dtidx.dayofyear)
print()

print("Quarter:")
print(dtidx.quarter)
print()

# Time components
dt_with_time = pd.DatetimeIndex(['2020-01-01 12:30:45', '2020-01-02 14:45:30'])
print("\nDatetimeIndex with time:")
print(dt_with_time)
print()

print("Hour component:")
print(dt_with_time.hour)
print()

print("Minute component:")
print(dt_with_time.minute)
print()

print("Second component:")
print(dt_with_time.second)
print()

# Boolean flags
print("Is month start:")
print(dtidx.is_month_start)
print()

print("Is quarter end:")
print(dtidx.is_quarter_end)
print()

# Conversion methods
print("Convert to PeriodIndex with monthly frequency:")
print(dtidx.to_period('M'))
print()

print("Convert to Python datetime objects:")
print([dt for dt in dtidx.to_pydatetime()])
print()

# Timezone operations
print("Localize to UTC timezone:")
print(dtidx.tz_localize('UTC'))
print()

utc_idx = dtidx.tz_localize('UTC')
print("Convert from UTC to US/Eastern timezone:")
print(utc_idx.tz_convert('US/Eastern'))
print()

# Shifting and manipulating
print("Shift forward by 1 day:")
print(dtidx.shift(1, freq='D'))
print()

print("Floor to the day level:")
print(dt_with_time.floor('D'))
print()

print("Ceiling to the day level:")
print(dt_with_time.ceil('D'))
print()

print("Round to the day level:")
print(dt_with_time.round('D'))
print()

## 5. DatetimeIndex Specific Methods
DatetimeIndex:
DatetimeIndex(['2020-01-01', '2020-01-15', '2020-02-01', '2020-03-01'], dtype='datetime64[ns]', freq=None)

Year component:
Int64Index([2020, 2020, 2020, 2020], dtype='int64')

Month component:
Int64Index([1, 1, 2, 3], dtype='int64')

Day component:
Int64Index([1, 15, 1, 1], dtype='int64')

Day of week:
Int64Index([2, 2, 5, 6], dtype='int64')

Day of year:
Int64Index([1, 15, 32, 61], dtype='int64')

Quarter:
Int64Index([1, 1, 1, 1], dtype='int64')


DatetimeIndex with time:
DatetimeIndex(['2020-01-01 12:30:45', '2020-01-02 14:45:30'], dtype='datetime64[ns]', freq=None)

Hour component:
Int64Index([12, 14], dtype='int64')

Minute component:
Int64Index([30, 45], dtype='int64')

Second component:
Int64Index([45, 30], dtype='int64')

Is month start:
[ True False  True  True]

Is quarter end:
[False False False False]

Convert to PeriodIndex with monthly frequency:
PeriodIndex(['2020-01', '2020-01', '2020-02', '2020-03'], dtype='period[M]'

In [8]:

# 6. TimedeltaIndex Specific Methods
# -------------------------------------------------
print("## 6. TimedeltaIndex Specific Methods")

# Create a TimedeltaIndex
tdidx = pd.TimedeltaIndex(['1 day', '2 days 12 hours', '3 days', '4 days 6 hours'])
print("TimedeltaIndex:")
print(tdidx)
print()

# Components
print("Days:")
print(tdidx.days)
print()

print("Seconds (remainder):")
print(tdidx.seconds)
print()

print("Microseconds (remainder):")
print(tdidx.microseconds)
print()

# Total components
print("Total seconds:")
print(tdidx.total_seconds())
print()

# Convert to Python timedelta objects
print("Convert to Python timedelta objects:")
print([td for td in tdidx.to_pytimedelta()])
print()

# Get components in a DataFrame
print("Components:")
print(tdidx.components)
print()


## 6. TimedeltaIndex Specific Methods
TimedeltaIndex:
TimedeltaIndex(['1 days 00:00:00', '2 days 12:00:00', '3 days 00:00:00', '4 days 06:00:00'], dtype='timedelta64[ns]', freq=None)

Days:
Int64Index([1, 2, 3, 4], dtype='int64')

Seconds (remainder):
Int64Index([0, 43200, 0, 21600], dtype='int64')

Microseconds (remainder):
Int64Index([0, 0, 0, 0], dtype='int64')

Total seconds:
Float64Index([86400.0, 216000.0, 259200.00000000003, 367200.0], dtype='float64')

Convert to Python timedelta objects:
[datetime.timedelta(days=1), datetime.timedelta(days=2, seconds=43200), datetime.timedelta(days=3), datetime.timedelta(days=4, seconds=21600)]

Components:
   days  hours  minutes  seconds  milliseconds  microseconds  nanoseconds
0     1      0        0        0             0             0            0
1     2     12        0        0             0             0            0
2     3      0        0        0             0             0            0
3     4      6        0        0             0

In [9]:
# 7. PeriodIndex Specific Methods
# -------------------------------------------------
print("## 7. PeriodIndex Specific Methods")

# Create a PeriodIndex
pidx = pd.PeriodIndex(['2020-01', '2020-02', '2020-03', '2020-04'], freq='M')
print("PeriodIndex:")
print(pidx)
print()

# Components
print("Year:")
print(pidx.year)
print()

print("Month:")
print(pidx.month)
print()

print("Quarter:")
print(pidx.quarter)
print()

# Frequency
print(f"Frequency: {pidx.freq}")
print()

# Convert to different frequency
print("Convert to quarterly frequency:")
print(pidx.asfreq('Q'))
print()

# Convert to DatetimeIndex
print("Convert to DatetimeIndex (period start):")
print(pidx.to_timestamp())
print()

print("Convert to DatetimeIndex (period end):")
print(pidx.to_timestamp(how='end'))
print()


## 7. PeriodIndex Specific Methods
PeriodIndex:
PeriodIndex(['2020-01', '2020-02', '2020-03', '2020-04'], dtype='period[M]')

Year:
Int64Index([2020, 2020, 2020, 2020], dtype='int64')

Month:
Int64Index([1, 2, 3, 4], dtype='int64')

Quarter:
Int64Index([1, 1, 1, 2], dtype='int64')

Frequency: <MonthEnd>

Convert to quarterly frequency:
PeriodIndex(['2020Q1', '2020Q1', '2020Q1', '2020Q2'], dtype='period[Q-DEC]')

Convert to DatetimeIndex (period start):
DatetimeIndex(['2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01'], dtype='datetime64[ns]', freq='MS')

Convert to DatetimeIndex (period end):
DatetimeIndex(['2020-01-31 23:59:59.999999999', '2020-02-29 23:59:59.999999999',
               '2020-03-31 23:59:59.999999999', '2020-04-30 23:59:59.999999999'],
              dtype='datetime64[ns]', freq=None)



In [10]:
# 8. IntervalIndex Specific Methods
# -------------------------------------------------
print("## 8. IntervalIndex Specific Methods")

# Create an IntervalIndex
iidx = pd.IntervalIndex.from_breaks([0, 1, 2, 3, 4])
print("IntervalIndex:")
print(iidx)
print()

# Components
print("Left endpoints:")
print(iidx.left)
print()

print("Right endpoints:")
print(iidx.right)
print()

print("Mid points:")
print(iidx.mid)
print()

print("Length of intervals:")
print(iidx.length)
print()

# Check properties
print(f"Is non-overlapping: {iidx.is_non_overlapping_monotonic}")
print()

# Membership check
print("Does interval [1, 2) contain 1.5?")
print(iidx[1].contains(1.5))
print()

# Creation methods
print("IntervalIndex from arrays:")
print(pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3]))
print()

print("IntervalIndex from breaks:")
print(pd.IntervalIndex.from_breaks([0, 1, 2, 3]))
print()

print("IntervalIndex from tuples:")
print(pd.IntervalIndex.from_tuples([(0, 1), (1, 2), (2, 3)]))
print()

## 8. IntervalIndex Specific Methods
IntervalIndex:
IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4]], dtype='interval[int64, right]')

Left endpoints:
Int64Index([0, 1, 2, 3], dtype='int64')

Right endpoints:
Int64Index([1, 2, 3, 4], dtype='int64')

Mid points:
Float64Index([0.5, 1.5, 2.5, 3.5], dtype='float64')

Length of intervals:
Int64Index([1, 1, 1, 1], dtype='int64')

Is non-overlapping: True

Does interval [1, 2) contain 1.5?


AttributeError: 'pandas._libs.interval.Interval' object has no attribute 'contains'

In [None]:
# 9. Boolean Operations and Filtering
# -------------------------------------------------
print("## 9. Boolean Operations and Filtering")

# Create an index
idx = pd.Index([1, 2, 3, 4, 5])
print("Index:")
print(idx)
print()

# Check membership
print("Check if values are in [1, 3, 5]:")
print(idx.isin([1, 3, 5]))
print()

# Filter with where
print("Replace values where condition is False with -1:")
print(idx.where(idx > 3, -1))
print()

# Filter with regex
sidx = pd.Index(['apple', 'banana', 'cherry', 'date'])
print("\nString Index:")
print(sidx)
print()

print("Filter values starting with 'a' or 'b':")
print(sidx.str.filter(regex='^[ab]'))
print()

# Map values
print("Map values using a function:")
print(idx.map(lambda x: x * 2))
print()

In [None]:
# 10. Index Conversion
# -------------------------------------------------
print("## 10. Index Conversion")

# Create an index
idx = pd.Index([1, 2, 3, 4, 5], name='numbers')
print("Index:")
print(idx)
print()

# Convert to list
print("Convert to list:")
print(idx.tolist())
print()

# Convert to numpy array
print("Convert to numpy array:")
print(idx.to_numpy())
print()

# Convert to Series
print("Convert to Series:")
print(idx.to_series())
print()

# Convert MultiIndex to DataFrame
midx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b', 2)], names=['letter', 'number'])
print("\nMultiIndex:")
print(midx)
print()

print("Convert to DataFrame:")
print(midx.to_frame())
print()

# Type conversion
print("Convert to different type:")
print(idx.astype('float64'))
print()