### Pandas Revision 

In [3]:
import numpy as np
import pandas as pd

In [4]:
num_data = np.array([[1, 2, 3, 4, 5], [2, 4, 6, 8, 10], [1, 3, 5, 7, 9], [2, 3, 5, 7, 11], [1, 4, 9, 16, 25]])
num = num_data.T
num_col = ['R-num', 'even', 'odd', 'prime', 'square']
num_index = ['num1', 'num2', 'num3', 'num4', 'num5']

df = pd.DataFrame(num, columns=num_col, index=num_index)
df

Unnamed: 0,R-num,even,odd,prime,square
num1,1,2,1,2,1
num2,2,4,3,3,4
num3,3,6,5,5,9
num4,4,8,7,7,16
num5,5,10,9,11,25


#### Accessing Column

In [5]:
s_even = df['even']
# series indexing: gives column (only one) in series format. we can access the elements of column like this ( df['even'][0] )
# but not recommended, instead use loc/iloc   
# also we can use df.even (column with no space)            

d_even = df[['even', 'odd']]            
# dataframe indexing: gives column in dataframe format. only use to access one or multiple column, 
# can't further access elements of column.

### .loc[] - Label Based Indexing
- Selects data based on the labels of your rows (index) and columns.
- The row and column names you see when you print the DataFrame.
- When slicing, both the start and stop labels are included.

In [6]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Age': [25, 30, 35, 22, 28],
    'City': ['New York', 'Paris', 'London', 'Berlin', 'Tokyo'],
    'Score': [85, 90, 78, 92, 88]
}

df = pd.DataFrame(data=data, index=['id_A', 'id_B', 'id_C', 'id_D', 'id_E'])
print("Our Sample DataFrame:")
print(df)
print("-" * 30)

Our Sample DataFrame:
         Name  Age      City  Score
id_A    Alice   25  New York     85
id_B      Bob   30     Paris     90
id_C  Charlie   35    London     78
id_D    David   22    Berlin     92
id_E      Eve   28     Tokyo     88
------------------------------


In [None]:
# Label-Based Selection 
# general syntax: df.loc[row_indexer, column_indexer]
# return row in series 
df.loc['id_B']                          # accessing row with index name
df.loc['id_B']['Name']                  # accessing first element of the row (not recommended, use block 12's method)

'Bob'

In [None]:
# selecting multiple row with list of labels
df.loc[['id_A', 'id_B', 'id_E']]        # since singe bracket gives series, so we need double bracket for multiple row

# selecting a slice of rows by labels (inclusive)
df.loc['id_B' : 'id_E']                 # select row from B to E (included). (in this case we don't need to inclose in double bracket)

In [24]:
# selecting single cell by row and column labels:
df.loc['id_B', 'City']

# what if we need to select specific column of two rows
df.loc[['id_A', 'id_B'], ['Name', 'Score']]        

# selecting all rows for specific column
df.loc[:, ['Name', 'Age']]

Unnamed: 0,Name,Age
id_A,Alice,25
id_B,Bob,30
id_C,Charlie,35
id_D,David,22
id_E,Eve,28


In [23]:
# Boolean Indexing (very powerful with .loc[])
df.loc[df['Score']>80]

# complex condition
df.loc[(df['Age']>28) & (df['Score']>85), ['Name', 'City']]

Unnamed: 0,Name,City
id_B,Bob,Paris


### .iloc[] - Access by Integer Position
- Selects data based on the integer positions of your rows and columns using .iloc[]
- Uses numerical indices (like 0, 1, 2…) rather than row and column names.
- When slicing, the start is included but the stop is excluded — like standard Python slicing.

In [29]:
df

Unnamed: 0,Name,Age,City,Score
id_A,Alice,25,New York,85
id_B,Bob,30,Paris,90
id_C,Charlie,35,London,78
id_D,David,22,Berlin,92
id_E,Eve,28,Tokyo,88


In [None]:
# Syntax: df.iloc[row_positions, column_positions]
df.iloc[0]                  # returns series
df.iloc[0:3]                # selecting multiple rows returns dataframe (row at 3rd index is not included)
df.iloc[[0, 2], [0, 3]]     # selecting specific rows and columns

Unnamed: 0,Name,Score
id_A,Alice,85
id_C,Charlie,78


### Key Differences Summarized:
   | Feature            | `.loc`                                         | `.iloc`                                      |
|-------------------|------------------------------------------------|----------------------------------------------|
| **Input Type**     | Labels (index/column names)                   | Integer positions (0-based)                  |
| **Slicing End**    | Inclusive (`df.loc['A':'C']` gets A, B, C)    | Exclusive (`df.iloc[0:3]` gets 0, 1, 2)      |
| **Boolean Index**  | Primarily used with `.loc`                    | Can take boolean arrays, but less common     |
| **Flexibility**    | Good when labels are meaningful/stable        | Good when positions are known/fixed          |


### when to use "df."

In [None]:
# Methods (function you can call)
df.head()
df.tail()

# Attributes (properties you access, no parentheses)
df.columns
df.index
df.shape

#Dot notation for single-column access (only works if the name has no spaces or special characters)
df.even
df.odd

### Exercise

In [36]:
# Sample data
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'Age': [24, 27, 22, 32, 29],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix'],
    'Score': [85, 90, 78, 88, 92]
}

# Create DataFrame with custom index
df = pd.DataFrame(data, index=['A', 'B', 'C', 'D', 'E'])

# Show the DataFrame
print("DataFrame for practice:")
print(df)


DataFrame for practice:
      Name  Age         City  Score
A    Alice   24     New York     85
B      Bob   27  Los Angeles     90
C  Charlie   22      Chicago     78
D    David   32      Houston     88
E      Eva   29      Phoenix     92


In [39]:
# 1. Get the Age of the person with index label 'C'
age = df.loc['C', 'Age']
print(f'Age of person with inde label C: {age}')

Age of person with inde label C: 22


In [41]:
# 2. Select the first 3 rows of the DataFrame using .iloc
first_3_rows = df.iloc[:3]
print(f'First 3 rows of DataFrame: \n {first_3_rows}')

First 3 rows of DataFrame: 
       Name  Age         City  Score
A    Alice   24     New York     85
B      Bob   27  Los Angeles     90
C  Charlie   22      Chicago     78


In [None]:
# 3. Get the Name and Score of the person in the last row using .iloc
last_row_data = df.iloc[-1, [0, 3]]             # df.iloc[-1][['Name', 'Score']]
print(f'Name and score of last row: \n {last_row_data}')

Name and score of last row: 
 Name     Eva
Score     92
Name: E, dtype: object


In [45]:
# 4. Select the City of people from label 'B' to 'D' (inclusive)
city = df.loc['B':'D', 'City']
print(f'City of people from B to D: \n {city}')

City of people from B to D: 
 B    Los Angeles
C        Chicago
D        Houston
Name: City, dtype: object


In [48]:
# 5. Retrieve the entire 2nd column using .iloc
sec_col = df.iloc[:, 1]
print(f'second column: \n {sec_col}')

second column: 
 A    24
B    27
C    22
D    32
E    29
Name: Age, dtype: int64


In [None]:
# 6. Get the data of the person named 'David'
df[df['Name'] == 'David']

Unnamed: 0,Name,Age,City,Score
D,David,32,Houston,88
