In [1]:
import numpy as np
import pandas as pd

In [2]:
# An important method on pandas objects is reindex, which means to create a new object with the data conformed to a new index
# consider an example

cars = pd.Series(['Bugatti', 'Lamborghini', 'Rolls Royce', 'Mustang'], index = [2, 1, 4, 3])

In [3]:
cars

2        Bugatti
1    Lamborghini
4    Rolls Royce
3        Mustang
dtype: object

In [4]:
# Calling reindexing on this series rearranges the data according to the new index, introducing missing values if any index values were not already present

cars2 = cars.reindex([1, 2, 3, 4, 5])

In [5]:
cars2

1    Lamborghini
2        Bugatti
3        Mustang
4    Rolls Royce
5            NaN
dtype: object

In [6]:
# For ordered data like time series, it may be desirable to do some interpolation or filling of values when reindxing
# The method option allows us to do this, using a method such as ffill, which forward fills the values

color = pd.Series(['black', 'red', 'blue'], index = [0, 2, 4])

In [7]:
color

0    black
2      red
4     blue
dtype: object

In [8]:
color2 = color.reindex(range(6), method='ffill')

In [9]:
color2

0    black
1    black
2      red
3      red
4     blue
5     blue
dtype: object

In [10]:
# With DataFrame, reindex can alter either the (row) index, columns, or both
# When pass only a sequence, it reindexes the rows in the result

frame = pd.DataFrame(np.arange(9).reshape(3,3), index = [1, 3, 4], columns = ['Bugatti', 'Lamborghini', 'Rolls Royce'])

In [11]:
frame

Unnamed: 0,Bugatti,Lamborghini,Rolls Royce
1,0,1,2
3,3,4,5
4,6,7,8


In [12]:
frame2 = frame.reindex([1, 2, 3, 4])

In [13]:
frame2

Unnamed: 0,Bugatti,Lamborghini,Rolls Royce
1,0.0,1.0,2.0
2,,,
3,3.0,4.0,5.0
4,6.0,7.0,8.0


In [14]:
# The columns can be reindexed with the columns keyword

cars3 = ['Lamborghini', 'Mustang','Rolls Royce']

In [15]:
frame3 = frame.reindex(columns = cars3)

In [16]:
# As we'll explore in more detail, you can reindex more succintly by label - indexing with loc and many users prefer to use it exclusively

frame3.loc[[1, 3, 4], cars3]

Unnamed: 0,Lamborghini,Mustang,Rolls Royce
1,1,,2
3,4,,5
4,7,,8


# Reindex Function Arguments

In [17]:
# Argument: index
# Description: New sequence to use as index. Can be Index instance or any other sequence-like Python data structure. An Index will be used exactly as is without any copying.

# Sample sales data
data = {'Product': ['A', 'B', 'C', 'D'],
        'Price': [10, 20, 15, 25],
        'Quantity': [5, 10, 8, 12]}

# Create a DataFrame with the 'Product' column as the index
df = pd.DataFrame(data).set_index('Product')
print("Original DataFrame:")
print(df)

# Define a new index as a list
new_index = ['Jan', 'Feb', 'Mar', 'Apr']

# Assign the new index to the DataFrame using the 'index' argument
df = df.reindex(new_index)
print("\nDataFrame with a new index:")
print(df)


Original DataFrame:
         Price  Quantity
Product                 
A           10         5
B           20        10
C           15         8
D           25        12

DataFrame with a new index:
         Price  Quantity
Product                 
Jan        NaN       NaN
Feb        NaN       NaN
Mar        NaN       NaN
Apr        NaN       NaN


In [18]:
# Argument: Method
# Description: Interpolation (fill) method; 'ffill' fills forward, while 'bfill' fills backward.

# Sample DataFrame with missing values
data = {'A': [1, np.nan, np.nan, 4, 5],
        'B': [6, np.nan, 8, 9, 10]}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

# Fill missing values using the 'ffill' method
df_filled = df.fillna(method='ffill')
print("\nDataFrame with missing values filled using 'ffill':")
print(df_filled)

Original DataFrame:
     A     B
0  1.0   6.0
1  NaN   NaN
2  NaN   8.0
3  4.0   9.0
4  5.0  10.0

DataFrame with missing values filled using 'ffill':
     A     B
0  1.0   6.0
1  1.0   6.0
2  1.0   8.0
3  4.0   9.0
4  5.0  10.0


In [19]:
# Example of bfill method

# Sample DataFrame with missing values
data = {'A': [1, np.nan, np.nan, 4, 5],
        'B': [6, np.nan, 8, np.nan, 10]}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

# Fill missing values using the 'bfill' method
df_filled = df.fillna(method='bfill')
print("\nDataFrame with missing values filled using 'bfill':")
print(df_filled)

Original DataFrame:
     A     B
0  1.0   6.0
1  NaN   NaN
2  NaN   8.0
3  4.0   NaN
4  5.0  10.0

DataFrame with missing values filled using 'bfill':
     A     B
0  1.0   6.0
1  4.0   8.0
2  4.0   8.0
3  4.0  10.0
4  5.0  10.0


In [20]:
# Argument: fill_value
# Description: Substitute value to use when introducing missing data by reindexing.

# Sample DataFrame
data = {'A': [1, 2, 3],
        'B': [4, 5, 6]}
df = pd.DataFrame(data, index=['a', 'b', 'c'])
print("Original DataFrame:")
print(df)

# Reindex the DataFrame with additional labels and fill missing values with 0
new_index = ['a', 'b', 'c', 'd', 'e']
df_reindexed = df.reindex(new_index, fill_value=0)
print("\nReindexed DataFrame with fill_value=0:")
print(df_reindexed)

Original DataFrame:
   A  B
a  1  4
b  2  5
c  3  6

Reindexed DataFrame with fill_value=0:
   A  B
a  1  4
b  2  5
c  3  6
d  0  0
e  0  0


In [21]:
# Argument: limit
# Description: When forward- or backfilling, maximum size gap (in number of elements) to fill.

# Sample DataFrame with missing values
data = {'A': [1, np.nan, np.nan, np.nan, 5],
        'B': [6, np.nan, np.nan, np.nan, 10]}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

# Forward-fill missing values with a limit of 2 consecutive values
df_ffill = df.ffill(limit=2)
print("\nDataFrame with forward-filled values (limit=2):")
print(df_ffill)

# Backward-fill missing values with a limit of 1 consecutive value
df_bfill = df.bfill(limit=1)
print("\nDataFrame with backward-filled values (limit=1):")
print(df_bfill)


Original DataFrame:
     A     B
0  1.0   6.0
1  NaN   NaN
2  NaN   NaN
3  NaN   NaN
4  5.0  10.0

DataFrame with forward-filled values (limit=2):
     A     B
0  1.0   6.0
1  1.0   6.0
2  1.0   6.0
3  NaN   NaN
4  5.0  10.0

DataFrame with backward-filled values (limit=1):
     A     B
0  1.0   6.0
1  NaN   NaN
2  NaN   NaN
3  5.0  10.0
4  5.0  10.0


In [22]:
# Argument: tolerance
# Description: When forward- or backfilling, maximum size gap (in absolute numeric distance) to fill for inexact matches.

In [23]:
# Argument: level
# Description: Match simple Index on level of MultiIndex; otherwise select subset of.

In [24]:
# Argument: copy
# Description: If True, always copy underlying data even if new index is equivalent to old index; if False, do not copy the data when the indexes are equivalen t

import pandas as pd

# Create a DataFrame with a simple index
data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df = pd.DataFrame(data, index=['X', 'Y', 'Z'])

# Create a new DataFrame with an equivalent index
df_copy = df.reindex(['X', 'Y', 'Z'], copy=True)
df_no_copy = df.reindex(['X', 'Y', 'Z'], copy=False)

# Modify the new DataFrames
df_copy.loc['X', 'A'] = 10
df_no_copy.loc['X', 'A'] = 10

# Check the original DataFrame
print(df)

    A  B
X  10  4
Y   2  5
Z   3  6
