# Custom Index

#### As already mentioned, pandas creates a default integer index to identify each row. In this section, we'll create a custom index and try to access the DataFrame using it.

In [1]:
import pandas as pd

In [2]:
dict = {
    "email": ["amanda@gmail.com", "mike@gmail.com", "john@gmail.com"],
    "first": ["Amanda", "Mike", "John"],
    "last": ["Taylor", "Tyson", "Doe"]
}

In [3]:
df = pd.DataFrame(dict)
df

Unnamed: 0,email,first,last
0,amanda@gmail.com,Amanda,Taylor
1,mike@gmail.com,Mike,Tyson
2,john@gmail.com,John,Doe


In [61]:
# Setting a column as custom index without modifying the original DataFrame
df2 = df.set_index('email')
df2

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
amanda@gmail.com,Amanda,Taylor
mike@gmail.com,Mike,Tyson
john@gmail.com,John,Doe


- **As iloc function is used to access the DataFrame using the default integer index, there's another method called loc by which DataFrame can be accessed by using the custom index.**
- Note that when there's no custom index defined, loc acts same as iloc and uses the default integer index.

In [19]:
# Accessing a row using the custom index
df2.loc['amanda@gmail.com']

first    Amanda
last     Taylor
Name: amanda@gmail.com, dtype: object

In [62]:
# Checking index
print('index of original DataFrame:\n', df.index, '\n')
print('index of DataFrame with custom index:\n', df2.index)

index of original DataFrame:
 RangeIndex(start=0, stop=3, step=1) 

index of DataFrame with custom index:
 Index(['amanda@gmail.com', 'mike@gmail.com', 'john@gmail.com'], dtype='object', name='email')


#### Note that pandas doesn't enforce the unique constraint on indexes like databases. However, it's better to be unique.

In [32]:
df3 = df.copy()
df3

Unnamed: 0,email,first,last
0,amanda@gmail.com,Amanda,Taylor
1,mike@gmail.com,Mike,Tyson
2,john@gmail.com,John,Doe


In [33]:
# Modifying the default index in the original DataFrame
df3.set_index('email', inplace=True)
df3

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
amanda@gmail.com,Amanda,Taylor
mike@gmail.com,Mike,Tyson
john@gmail.com,John,Doe


In [36]:
# Accessing multiple rows
df2.loc[ ['amanda@gmail.com', 'mike@gmail.com'] ]

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
amanda@gmail.com,Amanda,Taylor
mike@gmail.com,Mike,Tyson


In [40]:
# Accessing multiple rows using the slicing syntax
df2.loc['amanda@gmail.com':'john@gmail.com']  # Unlike iloc, slicing syntax in loc includes the end row

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
amanda@gmail.com,Amanda,Taylor
mike@gmail.com,Mike,Tyson
john@gmail.com,John,Doe


In [42]:
# Accessing both columns and rows
df2.loc[ ['mike@gmail.com', 'john@gmail.com'], 'first':'last' ]

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
mike@gmail.com,Mike,Tyson
john@gmail.com,John,Doe


In [43]:
# Accessing multiple rows of a column (extracts as Series)
df2.loc[ ['amanda@gmail.com','mike@gmail.com'], "last" ]

email
amanda@gmail.com    Taylor
mike@gmail.com       Tyson
Name: last, dtype: object

In [44]:
# Accessing a single element
df2.loc[ 'john@gmail.com', 'last' ]

'Doe'

In [47]:
df2

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
amanda@gmail.com,Amanda,Taylor
mike@gmail.com,Mike,Tyson
john@gmail.com,John,Doe


In [46]:
# Resetting the index
df4 = df2.reset_index()
df4

Unnamed: 0,email,first,last
0,amanda@gmail.com,Amanda,Taylor
1,mike@gmail.com,Mike,Tyson
2,john@gmail.com,John,Doe


In [52]:
df5 = df2.copy()
df5

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
amanda@gmail.com,Amanda,Taylor
mike@gmail.com,Mike,Tyson
john@gmail.com,John,Doe


In [53]:
# Resetting the index of original DataFrame
df5.reset_index(inplace=True)
df5

Unnamed: 0,email,first,last
0,amanda@gmail.com,Amanda,Taylor
1,mike@gmail.com,Mike,Tyson
2,john@gmail.com,John,Doe


- ### Sorting based on index

In [54]:
print('before sorting')
df2

before sorting


Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
amanda@gmail.com,Amanda,Taylor
mike@gmail.com,Mike,Tyson
john@gmail.com,John,Doe


In [59]:
# Sorting in ascending order without changing the original DataFrame
df2.sort_index(ascending=True)
# Set ascending=False for descending order sorting

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
amanda@gmail.com,Amanda,Taylor
john@gmail.com,John,Doe
mike@gmail.com,Mike,Tyson


In [60]:
# Sorting with original DataFrame changed
df2_sorted = df2.copy()
df2_sorted.sort_index(ascending=True, inplace=True)
df2_sorted

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
amanda@gmail.com,Amanda,Taylor
john@gmail.com,John,Doe
mike@gmail.com,Mike,Tyson
