# Loc
The Pandas loc indexer can be used with DataFrames for cases when Selecting rows by label/index or Selecting rows with a boolean / conditional lookup.<br>
__Syntax__ : data.iloc[<row selection>, <column selection>] <br>

In [None]:
import pandas as pd
import random
 
# read sample dataset
data = pd.read_csv('../datasets/uk-500.csv')

# set a numeric id for use as an index for examples.
# not really needed to have a random id, but the code is useful as an example on how to do that
#data['id'] = [random.randint(0,1000) for x in range(data.shape[0])]
 
data.head(5)

In [None]:
data.index

In [None]:
# reset the index, and don't make it a regular variable (drop it)
data = data.reset_index(drop=True)

In [None]:
# set the last name as the index
data.set_index("last_name", inplace=True)
data.head()

In [None]:
# this wouldn't have worked if the default (0, 1, ..) was still the index
data.loc['Andrade']

In [None]:
# multiple rows
data.loc[['Andrade','Veness']]

In [None]:
data.loc[['Andrade','Veness'],['first_name','address','city']]

In [None]:
# Booleans
data['first_name']=='Erasmo'

In [None]:
# Booleans work as a filter 
data.loc[ data['first_name']=='Erasmo', ['first_name','company_name','email','phone1']]

In [None]:
data.loc[ data['first_name']== 'Antonio', 'email']

In [None]:
# why does this look different? series vs dataframe
data.loc[ data['first_name']== 'Antonio', ['email']]

## Iloc
The iloc indexer is used for __integer-location based indexing / selection by position__.

__Syntax__ : data.iloc[<row selection>, <column selection>] <br>

 iloc selects rows and columns by number, in the order that they appear in the data frame.<br>

### Rows

In [None]:
# Single selections using iloc and DataFrame
data.iloc[0] # first row of data frame (Aleshia Tomkiewicz)

In [None]:
data.iloc[1] # second row of data frame (Evan Zigomalas)

In [None]:
data.iloc[-1] # last row of data frame (Mi Richan)

### Columns

In [None]:
# first column of data frame (first_name)
data.iloc[:,0]

In [None]:
# second column of data frame (last_name)
data.iloc[:,1] 

In [None]:
# last column of data frame (web)
data.iloc[:,-1] 

### Multiple row and column selections using iloc and DataFrame

In [None]:
# first five rows of dataframe
data.iloc[0:5] 

In [None]:
# first two columns of data frame with all rows
data.iloc[:, 0:2] 

In [None]:
# 1st, 4th, 7th, 25th row + 1st 6th 7th columns.
data.iloc[[0,3,6,24], [0,5,6]] 

In [None]:
# first 5 rows and 5th, 6th, 7th columns of data frame (county -> phone1).
data.iloc[0:5, 5:8]

In [None]:
# types
print(type(data.iloc[100]))
print(type(data.iloc[[100]]))
print(type (data.iloc[2:10]))
print(type (data.iloc[1:2, 3]))
print(type (data.iloc[1:2, [3]]))
print(type(data.iloc[1:2, 3:6]))

# Isin

The isin() method checks if the Dataframe contains the specified values. It returns a DataFrame similar to the original DataFrame, but the original values have been replaced with True if the value was one of the specified values, else False.<br>

__Syntax__ : dataframe.isin(values)<br>
values - The values to check if is in the DataFrame.<br>

In [None]:
#example
data = {
  "name": ["A", "B", "C"],
  "age": [20, 63, 12]
}

df = pd.DataFrame(data)

print(df.isin([20, 12]))

In [None]:
# Creating DataFrame
data = pd.DataFrame({
  'Name': ['John', 'Sam', 'Luna', 'Harry'],
  'Age': [25, 45, 23, 32],
  'Department': ['Sales', 'Engineering', 'Engineering', 'Human Resource']
})
 
#List of Departments to filter
departments_to_filter = ['Engineering', 'Sales', 'Finance']
 
result = data.isin(departments_to_filter)
 
print(result)

In [None]:
# just filtering the department
result = data['Department'].isin(departments_to_filter)
 
print(result)

In [None]:
# take the filter (list of booleans) to get dataframe with matched rows
# this works well if the filter is one column (not multiple columns)
data[ result ]

In [None]:
start_age=20
end_age=30
# Using isin() method to filter employees on age
age_filter = data['Age'].isin(range(start_age, end_age+1))
# Using the filter to retrieve the data
data[ age_filter ]

In [None]:
# dictionary that has key-value pairs with variable and filter
dict_data_to_filter = {'Name': ['Sam', 'Harry'], 'Department': ['Engineering']}
 
result = data.isin(dict_data_to_filter)
 
print(result)

In [None]:
# note how this doesn't work the same
# each variable gets filtered, if all false, still in output data
data[result]

### Get rows that satisy both conditions (Name: Sam or Harry, Department: Engineering)

In [None]:
# easy way (sort of manual)
result= data[data['Name'].isin( dict_data_to_filter['Name']) & data['Department'].isin(dict_data_to_filter['Department'])]
result

In [None]:
# fancy (more generic) way
# https://stackoverflow.com/questions/67119428/filter-dataframe-using-dictionary-with-multiple-values

In [None]:
# list of conditions
[  (k,v) for (k,v) in dict_data_to_filter.items() ]

In [None]:
result = data[np.logical_and.reduce([data[k].isin(v) for k, v in dict_data_to_filter.items()])]
result

In [None]:
# example logical or using just numpy arrays
# https://stackoverflow.com/a/20528566/2901002
import numpy as np
x = np.array([True, True, False, False])
y = np.array([True, False, True, False])
z = np.array([False, False, False, False])
xyz = np.array((x, y, z))
xyz

In [None]:
# do this one 'top to bottom' -> True if any of the 3 is True, False otherwise (just the last one)
np.logical_or.reduce(xyz)