In [2]:
import pandas as pd
import numpy as np

# **Different choices for indexing**
There are three types of indexing options available in pandas

## `.loc[]`


*   It is  primarily label based, but may also be used with a boolean array. For example in pandas, you might select rows by their labels (`df.loc['row_label']`), but you can also filter rows based on a boolean array (e.g., `df[boolean_array]`) where only the True values are selected.











In [None]:
# Sample DataFrame
data = {'A': [10, 20, 30, 40, 50, 60, 70, 80],
        'B': [15, 25, 35, 45, 55, 65, 75, 85]}

df = pd.DataFrame(data, index=[0, 1, 2, 3, 4, 5, 6, 7])

# 1. Access by an integer (row label 5)
print("Access by integer (row label 5):")
print(df.loc[5])

# 2. Access by a list or array of integers [4, 3, 0]
print("\nAccess by a list of integers [4, 3, 0]:")
print(df.loc[[4, 3, 0]])

# 3. Access by a slice object with ints 1:7 (includes 1 and 6, excludes 7)
print("\nAccess by a slice 1:7:")
print(df.loc[1:7])

# 4. Access by a boolean array (e.g., filter values where column 'A' > 40)
bool_array = df['A'] > 40
print("\nAccess by a boolean array (A > 40):")
print(df.loc[bool_array])

# 5. Access by a callable function
# The function returns the rows where column 'B' is greater than 60
print("\nAccess by a callable function (B > 60):")
print(df.loc[lambda x: x['B'] > 60])


Access by integer (row label 5):
A    60
B    65
Name: 5, dtype: int64

Access by a list of integers [4, 3, 0]:
    A   B
4  50  55
3  40  45
0  10  15

Access by a slice 1:7:
    A   B
1  20  25
2  30  35
3  40  45
4  50  55
5  60  65
6  70  75
7  80  85

Access by a boolean array (A > 40):
    A   B
4  50  55
5  60  65
6  70  75
7  80  85

Access by a callable function (B > 60):
    A   B
5  60  65
6  70  75
7  80  85


In [None]:
# 'x' is a boolean Series that indicates whether both conditions are True for each row.
# For each row in 'df', it checks if the value in column 'A' is greater than 40
# and the value in column 'B' is greater than 55.
x = (df['A'] > 40) & (df['B'] > 55)

# Print the type of 'x', which should be a pandas Series.
print(f"The type of x is {type(x)}")

# Print the boolean Series 'x', showing True/False for each row.
print(x)

# Use .loc to filter and display rows where the conditions in 'x' are True.
print(df.loc[x])


The type of x is <class 'pandas.core.series.Series'>
0    False
1    False
2    False
3    False
4    False
5     True
6     True
7     True
dtype: bool
    A   B
5  60  65
6  70  75
7  80  85


## `.iloc[]`


*   It is primarily integer position based (from 0 to length-1 of the axis), but may also be used with a boolean array.

In [None]:
# Define a callable function to filter rows based on a condition
def filter_func(df):
    return (df['A'] > 30).values

In [34]:
# Create a sample DataFrame
data = {
    'A': [10, 20, 30, 40, 50, 60, 70, 80],
    'B': [5, 15, 25, 35, 45, 55, 65, 75],
    'C': [1, 2, 3, 4, 5, 6, 7, 8]
}
df = pd.DataFrame(data)

# Accessing the 5th row (index 5)
print(f"Accessing the 5th row (index 5):\n{df.iloc[5]}")

# Accessing rows at indices 4, 3, and 0
print(f"Accessing rows at indices 4, 3, and 0:\n{df.iloc[[4, 3, 0]]}")

# Accessing rows from index 1 to 6 (7 is exclusive)
print(f"Accessing rows from index 1 to 6 (7 is exclusive):\n{df.iloc[1:7]}")

# Create a boolean array where values in column 'A' are greater than 25
bool_array = df['A'] > 25
print(f"To access value using boolean array:\n{df.iloc[bool_array.values]}")  # Use .values to convert to a boolean array

# Using the function to get rows where column 'A' is greater than 30
print(f"Using function to access the values:\n{df.iloc[filter_func(df)]}")


# Accessing the 3rd row and 2nd column
print(f"Accessing the value of the value at row index 3 and column index 2: {df.iloc[(3, 2)]}")  # Returns the value at row index 3 and column index 2


Accessing the 5th row (index 5):
A    60
B    55
C     6
Name: 5, dtype: int64
Accessing rows at indices 4, 3, and 0:
    A   B  C
4  50  45  5
3  40  35  4
0  10   5  1
Accessing rows from index 1 to 6 (7 is exclusive):
    A   B  C
1  20  15  2
2  30  25  3
3  40  35  4
4  50  45  5
5  60  55  6
6  70  65  7
To access value using boolean array:
    A   B  C
2  30  25  3
3  40  35  4
4  50  45  5
5  60  55  6
6  70  65  7
7  80  75  8
Using function to access the values:
    A   B  C
3  40  35  4
4  50  45  5
5  60  55  6
6  70  65  7
7  80  75  8
Accessing the value of the value at row index 3 and column index 2: 4


### Differentiate between `.loc[]` and `.iloc[]`

In [4]:
import pandas as pd

# Sample DataFrame
data = {
    'A': [10, 20, 30],
    'B': [40, 50, 60],
    'C': [70, 80, 90]
}
df = pd.DataFrame(data, index=['row1', 'row2', 'row3'])

# Using .loc[]
print(f"This is the value of df.loc['row1', 'A'] : {df.loc['row1', 'A']}")  #
print(f"This is the value of df.loc['row1':'row2', 'A':'B']:\n{df.loc['row1':'row2', 'A':'B']}")
# Using .iloc[]
print(f"This is the value of df.iloc[0,0]: {df.iloc[0, 0]}")
print(f"This is the value of df.iloc[0:2, 0:2]:\n{df.iloc[0:2, 0:2]}")

This is the value of df.loc['row1', 'A'] : 10
This is the value of df.loc['row1':'row2', 'A':'B']:
       A   B
row1  10  40
row2  20  50
This is the value of df.iloc[0,0] : 10
This is the value of df.iloc[0:2, 0:2]:
       A   B
row1  10  40
row2  20  50


##  `[]`


1.  The primary function of indexing is selecting out lower-dimensional slices.
2.  You can pass a list of columns to `[]` to select columns in that order.
3.  If a column is not contained in the DataFrame, an exception will be raised
4.  Multiple columns can also be set in this manner



In [16]:
dates = pd.date_range('1/1/2000', periods=8)

df = pd.DataFrame(np.random.randn(8, 4),
                  index=dates, columns=['A', 'B', 'C', 'D'])
df.head(6)

Unnamed: 0,A,B,C,D
2000-01-01,1.217277,1.842994,0.108062,0.947492
2000-01-02,1.836212,-0.453953,-1.130149,-0.799224
2000-01-03,-2.266578,-0.841693,2.063475,0.646885
2000-01-04,1.623422,-0.260949,-1.449851,0.176363
2000-01-05,0.87808,0.356,0.177719,-0.134578
2000-01-06,0.197345,1.799294,2.756316,-1.289205


In [18]:
s = df['A']
print(f"The type of s is {type(s)} and \nThe type of dates is {type(dates)}")
print(f"The type of one element of dates is {type(dates[5])}")
#Now since dates[5] will be the index value of the dataframe
#So for s[dates[5]] will be index - 5 in the series
print(f"The value of dates[5] is {dates[5]} and \nThe value of s[dates[5]] is {s[dates[5]]}")

The type of s is <class 'pandas.core.series.Series'> and 
The type of dates is <class 'pandas.core.indexes.datetimes.DatetimeIndex'>
The type of one element of dates is <class 'pandas._libs.tslibs.timestamps.Timestamp'>
The value of dates[5] is 2000-01-06 00:00:00 and 
The value of s[dates[5]] is 0.1973446291815295


### Using multiple columns


1.   The below code is useful for applying a transform (in-place) to a subset of the columns


In [21]:
df[['A','B']] = df[['B','A']]

## Differentiate between `.iloc[]`, `.loc[]` and `[]` using an analogy:

Let's imagine you're organizing books on different shelves in a library. Each book has a specific label (like the title), and each shelf has a specific position (like the row number).

1. `.loc[]` – Think of it as using labels:
* When using `.loc[]`, it's like saying, "I want the book with the title 'Data Science' from the 'Science' section." You are using names or labels to get specific rows and columns.
* *Analogy*: You're asking for the book by its title or category name, so you need to know the labels.
* *Example*:  If you ask for `.loc['Science', 'Data Science']`, you're asking for the specific entry labeled as 'Science' in the 'Data Science' column.
2. `.iloc[]` – Think of it as using positions:
* When using .iloc[], it's like saying, "I want the book that's on shelf 3, position 5." You're using the numerical positions (like row and column numbers) to access the data.
* *Analogy*: You're counting the shelves and books, asking for the 3rd book from the left on the 2nd shelf. It's purely based on numbers.
* *Example*: If you use `.iloc[2, 4]`, you're asking for the 3rd row and 5th column by their numerical positions, regardless of their labels.
3. `[]` for indexing – Think of it as asking for the whole shelf or column:
* When you just use [], it's like saying, "I want everything from the 'Data Science' section" without specifying a shelf number. You are retrieving an entire column or set of columns by label.
* *Analogy*: You’re asking for all the books in one section by its label, without focusing on individual positions.
* *Example*: df['Data Science'] retrieves the entire column named 'Data Science' but doesn’t care about row positions or column numbers.

## Summary:

* `.loc[]` is like asking for specific rows and columns by labels (e.g., section names or book titles).
* `.iloc[]` is like asking for specific rows and columns by their numeric positions (e.g., shelf number or book position).
* `[]` is like asking for the entire section/column by its name (e.g., the whole 'Data Science' column).