### Instructor Demo: Indexing

This program performs several slice and dice operations through indexing via loc and iloc functions.

In [None]:
# Import libraries and dependencies
import pandas as pd
from pathlib import Path

### Read CSV in as DataFrame

In [None]:
# Set the file path
file_path = Path("people.csv")

# Read in the CSV as a DataFrame
people_csv = pd.read_csv(file_path)
people_csv.head()

### View Summary Statistics (Default Numeric)

In [None]:
# View the summary statistics for the DataFrame, the describe() function defaults to only numerical data
people_csv.describe()

### View Summary Statistics (All Columns)

In [None]:
# View the summary statistics for the DataFrame, include all columns
people_csv.describe(include="all")

### Index Selection Using iloc

In [None]:
# Select the first row of the DataFrame
people_csv.iloc[0]

In [None]:
# But this does not look like a dataframe...what type is it?

In [None]:
type(people_csv.iloc[0])

In [None]:
# Ah, a pandas Series...essentially just a single column of the dataframe

In [None]:
# Select the second row of the DataFrame
people_csv.iloc[1] 

In [None]:
# Select the first 10 rows of the DataFrame
people_csv.iloc[0:10] 

In [None]:
# Select the last row of the DataFrame
people_csv.iloc[-1]

In [None]:
# Select the first column of the DataFrame
people_csv.iloc[:,0].head()

In [None]:
# Select the second column of the DataFrame, with all rows
people_csv.iloc[:,1].head()

In [None]:
# Select the last column of the DataFrame, with all rows
people_csv.iloc[:,-1].head()

In [None]:
# Select the first two columns of the DataFrame, with all rows
people_csv.iloc[:, 0:2].head()

In [None]:
# Select the 1st, 5th, 8th, 22nd rows of the 1st 4th and 6th columns.
people_csv.iloc[[0,4,7,21], [0,3,5]]

In [None]:
# Select the first 5 rows of the 3rd, 4th, and 5th columns of the DataFrame
people_csv.iloc[0:5, 2:5] 

### Assignment Using iLoc

![LOC_ILOC](\iloc_loc_differences_similarities.png) - https://towardsdatascience.com/how-to-use-loc-and-iloc-for-selecting-data-in-pandas-bd09cb4c3d79

In [None]:
# Modify the 'first_name' column value of the first row
people_csv.iloc[0, people_csv.columns.get_loc("first_name")] = "Arya"
people_csv.iloc[0]

In [None]:
# Wait, what? You can use get_loc() to return integer location for requested label...very helpful for tables with many features,
# ie. columns...its not uncommon for ML models to have hundreds of features, if not thousands. Random Forests are especially 
# useful algorithms at handling datasets with many features...

# Try it yourself in Python console...what does people_csv.columns.get_loc("first_name") return?

### Index Selection Using Loc

In [None]:
# Set the index as the 'first_name' column
people_csv = people_csv.set_index(people_csv["first_name"])
people_csv.head()

In [None]:
# Sort the index
people_csv = people_csv.sort_index()
people_csv.head()

In [None]:
# Select the row with the index 'Evan'
people_csv.loc["Evan"]

In [None]:
# Slice the data to output a range of rows based on the index
people_csv.loc["Aleshia":"Svetlana"].head()

In [None]:
# Filter rows based on a column value conditional
people_csv.loc[people_csv["gender"] == "M"].head()

### Assignment Using Loc

In [None]:
# Modify the 'first_name' value of the row with the index 'Yun'
print("Original Data")
print("*"*20)
people_csv.loc["Yun"]

In [None]:
# 'first_name' modification
people_csv.loc["Yun", "first_name"] = "Yuna"

In [None]:
print("Data After Modification")
print("*"*20)
people_csv.loc["Yun"]