In [1]:
import numpy as np
import pandas as pd 

# 1. Accessing Data by Columns
Columns in a DataFrame can be accessed directly by their names using either the dot notation or the bracket notation.

In [3]:
# Example:
# Imagine you have a dataset of customer purchases in an e-commerce store.
data = {
    "CustomerID": [101, 102, 103],
    "PurchaseAmount": [250, 180, 325],
    "Country": ["USA", "Canada", "Mexico"]
}
df = pd.DataFrame(data)
df

Unnamed: 0,CustomerID,PurchaseAmount,Country
0,101,250,USA
1,102,180,Canada
2,103,325,Mexico


In [None]:
# Accessing "PurchaseAmount" column
purchase_amounts = df["PurchaseAmount"] # However, the dot(.) notation only works for single word columns
print(purchase_amounts)

# 2. Accessing Data by Rows with .loc[] and .iloc[]
Pandas provides .loc[] for label-based indexing and .iloc[] for integer-based indexing.

# A. .loc - Label-Based Indexing
.loc is used to access data based on labels (the index or column names) of the DataFrame.
 It allows access to both rows and columns by their labels, making it flexible for filtering based on known names.
 Slicing and boolean arrays can also be used with .loc.

In [5]:
data = {
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [24, 27, 22],
    "City": ["New York", "Los Angeles", "Chicago"]
}
df = pd.DataFrame(data, index=["a", "b", "c"])
df

Unnamed: 0,Name,Age,City
a,Alice,24,New York
b,Bob,27,Los Angeles
c,Charlie,22,Chicago


In [7]:
# Accessing a specific row by label
row_b = df.loc["b"]
print(row_b)

Name            Bob
Age              27
City    Los Angeles
Name: b, dtype: object


In [None]:
# Accessing a specific element by row and column labels
age_bob = df.loc["b", "Age"]
print(age_bob)

In [None]:
# Accessing multiple rows and columns
subset = df.loc[["a", "c"], ["Name", "City"]]
print(subset)

# B. .iloc - integer-Based Indexing
.iloc is used to access data by integer-based positions (row and column indices).
It requires integer indices instead of labels, making it especially useful when the DataFrame doesn’t have meaningful labels.
Similar to .loc, .iloc allows you to specify slices, ranges, and lists of positions.

In [9]:
# Accessing a specific row by integer index
second_row = df.iloc[1]
print(second_row)

Name            Bob
Age              27
City    Los Angeles
Name: b, dtype: object


In [11]:
# Accessing a specific element by row and column indices
age_second_row = df.iloc[1, 1]
print(age_second_row)

27


In [None]:
# Accessing multiple rows and columns
subset = df.iloc[[0, 2], [0, 2]]
print(subset)

# 3. Accessing Data with .at[] and .iat[]
The .at[] and .iat[] methods allow fast access to a single scalar value in a DataFrame by label and integer position, respectively.

# a. .at - Fast Label-Based Scalar Access
.at is designed for fast access to a single scalar value (one cell) based on row and column labels.
It’s more efficient than .loc when accessing a single element because it’s optimized for scalar access

# b. .iat - Fast Integer-Based Scalar Access
.iat is similar to .at, but it uses integer-based positions to quickly access a single scalar value. Like .i

In [None]:
data = {
    "StudentID": [301, 302, 303],
    "Math": [88, 92, 79],
    "Science": [91, 85, 88]
}
df = pd.DataFrame(data)

In [None]:
# Accessing Math score of the second student by label
math_score_label = df.at[1, "Math"]
print(math_score_label)

In [None]:
# Accessing Math score of the second student by position
math_score_pos = df.iat[1, 1]
print(math_score_pos)

# 4. Accessing Data with .get()
For dictionaries within a DataFrame, .get() can access keys without raising errors if the key is missing.

Example:
Consider a DataFrame tracking expenses with optional categories.

In [None]:
data = {
    "Category": ["Food", "Travel", "Misc"],
    "Amount": [150, 200, 75]
}
df = pd.DataFrame(data)

In [None]:
# Accessing with .get() to avoid errors for missing columns
food_expenses = df.get("Food", "No data")
print(food_expenses)