# Return rows by index lables with `.loc[ ]`

### Import pandas

In [4]:
# Import pandas
import pandas as pd
from pathlib import Path
path = Path('C:/Users/alouden01/Documents/python-pandas-for-data-manipulation/starter_files/skill_11_extracting_values/cereal.csv')


### Read CSV
In this set of videos we'll use the [80 cereals dataset](https://www.kaggle.com/crawford/80-cereals) from Kaggle which is also avaiable in the course GitHub repo here: [80 cereals dataset](https://github.com/cbtn-data-science-ml/python-pandas-for-data-manipulation/tree/main/datasets). I've icluded the Kaggle link so you can learn more about the dataset, but they're the same, so feel free to use either resourse.

We'll also use the name as the index by using the `index_col` parameter inside of the `.read_csv()` method.

In [6]:
cereal = pd.read_csv(path)
cereal.head()

Unnamed: 0,name,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
0,100% Bran,N,C,70,4,1,130,10.0,5.0,6,280,25,3,1.0,0.33,68.402973
1,100% Natural Bran,Q,C,120,3,5,15,2.0,8.0,8,135,0,3,1.0,1.0,33.983679
2,All-Bran,K,C,70,4,1,260,9.0,7.0,5,320,25,3,1.0,0.33,59.425505
3,All-Bran with Extra Fiber,K,C,50,4,0,140,14.0,8.0,0,330,25,3,1.0,0.5,93.704912
4,Almond Delight,R,C,110,2,2,200,1.0,14.0,8,-1,25,3,1.0,0.75,34.384843


### Optimize dataset with sorting
It's  a best practice to sort the index to help pandas be more efficient when looking for values.

In [12]:
# cereal.set_index('name',inplace=True)
cereal.info()

<class 'pandas.core.frame.DataFrame'>
Index: 77 entries, 100% Bran to Wheaties Honey Gold
Data columns (total 15 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   mfr       77 non-null     object 
 1   type      77 non-null     object 
 2   calories  77 non-null     int64  
 3   protein   77 non-null     int64  
 4   fat       77 non-null     int64  
 5   sodium    77 non-null     int64  
 6   fiber     77 non-null     float64
 7   carbo     77 non-null     float64
 8   sugars    77 non-null     int64  
 9   potass    77 non-null     int64  
 10  vitamins  77 non-null     int64  
 11  shelf     77 non-null     int64  
 12  weight    77 non-null     float64
 13  cups      77 non-null     float64
 14  rating    77 non-null     float64
dtypes: float64(5), int64(8), object(2)
memory usage: 9.6+ KB


### Access rows by index lables with `.loc[ ]`

In [13]:
cereal.sort_index(inplace=True)
cereal.info()

<class 'pandas.core.frame.DataFrame'>
Index: 77 entries, 100% Bran to Wheaties Honey Gold
Data columns (total 15 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   mfr       77 non-null     object 
 1   type      77 non-null     object 
 2   calories  77 non-null     int64  
 3   protein   77 non-null     int64  
 4   fat       77 non-null     int64  
 5   sodium    77 non-null     int64  
 6   fiber     77 non-null     float64
 7   carbo     77 non-null     float64
 8   sugars    77 non-null     int64  
 9   potass    77 non-null     int64  
 10  vitamins  77 non-null     int64  
 11  shelf     77 non-null     int64  
 12  weight    77 non-null     float64
 13  cups      77 non-null     float64
 14  rating    77 non-null     float64
dtypes: float64(5), int64(8), object(2)
memory usage: 9.6+ KB


In [17]:
cereal.loc['Almond Delight']

mfr                 R
type                C
calories          110
protein             2
fat                 2
sodium            200
fiber             1.0
carbo            14.0
sugars              8
potass             -1
vitamins           25
shelf               3
weight            1.0
cups             0.75
rating      34.384843
Name: Almond Delight, dtype: object

In [23]:
# if you misspell a lable, you'll get a KeyError since it doesn't exist
cereal['All-Bran':'Almond Delight']

Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
All-Bran,K,C,70,4,1,260,9.0,7.0,5,320,25,3,1.0,0.33,59.425505
All-Bran with Extra Fiber,K,C,50,4,0,140,14.0,8.0,0,330,25,3,1.0,0.5,93.704912
Almond Delight,R,C,110,2,2,200,1.0,14.0,8,-1,25,3,1.0,0.75,34.384843


### Slicing with `.loc[ ]`

In [24]:
# label slicing is inclusive 
cereal.loc[['All-Bran','Triples'],['fiber','carbo','sugars']]


Unnamed: 0_level_0,fiber,carbo,sugars
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
All-Bran,9.0,7.0,5
Triples,0.0,21.0,3


In [31]:
net_mask = cereal['net_carbs'] = cereal['carbo'] - cereal['fiber']  
cereal.sort_values('net_carbs')

Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating,net_carbs
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
All-Bran with Extra Fiber,K,C,50,4,0,140,14.0,8.0,0,330,25,3,1.00,0.50,93.704912,-6.0
100% Bran,N,C,70,4,1,130,10.0,5.0,6,280,25,3,1.00,0.33,68.402973,-5.0
Quaker Oatmeal,Q,H,100,5,2,0,2.7,-1.0,-1,110,0,1,1.00,0.67,50.828392,-3.7
All-Bran,K,C,70,4,1,260,9.0,7.0,5,320,25,3,1.00,0.33,59.425505,-2.0
Post Nat. Raisin Bran,P,C,120,3,1,200,6.0,11.0,14,260,25,3,1.33,0.67,37.840594,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Triples,G,C,110,2,1,250,0.0,21.0,3,60,25,3,1.00,0.75,39.106174,21.0
Total Corn Flakes,G,C,110,2,1,200,0.0,21.0,3,35,100,3,1.00,1.00,38.839746,21.0
Corn Chex,R,C,110,2,0,280,0.0,22.0,3,25,25,1,1.00,1.00,41.445019,22.0
Rice Krispies,K,C,110,2,0,290,0.0,22.0,3,35,25,1,1.00,1.00,40.560159,22.0


In [None]:
# you can return every other ceral using steps of 2


In [None]:
# you can get the same results by omiting the start:end labels


### Returning multiple indexes

In [None]:
# order matters


### Return specific columns for an index label

In [None]:
# you can also return mulpitle rows with specific columns


### You can also leverage slicing!

In [None]:
# what is this?
