# Tutorial 2.1

Date: 2019

[Data Address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv)

---

## Import libraries

In [97]:
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

## Read the dataset

In [98]:
# Data Set Address
url = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv'

chipo = pd.read_csv(url, sep='\t')

In [99]:
chipo.shape
chipo.head(3)
chipo.tail(3)

(4622, 5)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39


Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
4619,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Pinto...",$11.25
4620,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Lettu...",$8.75
4621,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Pinto...",$8.75


## Clean

### Revise the item_price

In [100]:
# clean the item_price column and transform it in a float
# check the raw data
chipo.item_price.loc[[0, 9]]
chipo.item_price[0]

### Notice this loop function in a list ###
chipo.item_price = [float(value[1:-1]) for value in chipo.item_price]

# check the rivesed data
chipo.head(3)

0    $2.39 
9    $9.25 
Name: item_price, dtype: object

'$2.39 '

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,2.39
1,1,1,Izze,[Clementine],3.39
2,1,1,Nantucket Nectar,[Apple],3.39


### Delete the duplicates

In [101]:
# delete the duplicates in item_name and quantity
# check raw data
chipo[7:10]
chipo[7:10].duplicated(['item_name', 'quantity'])

chipo_filtered = chipo.drop_duplicates(['item_name', 'quantity'])

# check revised data
chipo_filtered[7:10]

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
7,4,1,Steak Burrito,"[Tomatillo Red Chili Salsa, [Fajita Vegetables...",11.75
8,4,1,Steak Soft Tacos,"[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...",9.25
9,5,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...",9.25


7    False
8    False
9     True
dtype: bool

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
7,4,1,Steak Burrito,"[Tomatillo Red Chili Salsa, [Fajita Vegetables...",11.75
8,4,1,Steak Soft Tacos,"[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...",9.25
10,5,1,Chips and Guacamole,,4.45


### Select Data

In [102]:
# select only the products with quantity equals to 1
chipo_filtered.head()

chipo_one_prod = chipo_filtered[chipo_filtered['quantity']==1]

chipo_one_prod.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,2.39
1,1,1,Izze,[Clementine],3.39
2,1,1,Nantucket Nectar,[Apple],3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98


Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,2.39
1,1,1,Izze,[Clementine],3.39
2,1,1,Nantucket Nectar,[Apple],3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39
5,3,1,Chicken Bowl,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...",10.98


### Count the number of item_name which item_price > 10

In [103]:
chipo_one_prod[chipo_one_prod['item_price']>10].item_name

chipo_one_prod[chipo_one_prod.item_price>10].item_name.nunique()

5                Chicken Bowl
7               Steak Burrito
39              Barbacoa Bowl
57             Veggie Burrito
62                Veggie Bowl
168     Barbacoa Crispy Tacos
186         Veggie Salad Bowl
250             Chicken Salad
606          Steak Salad Bowl
738         Veggie Soft Tacos
1132      Carnitas Salad Bowl
1229      Barbacoa Salad Bowl
Name: item_name, dtype: object

12

## Sort

### Sort the price

In [111]:
price_per_item = chipo_one_prod[['item_name', 'item_price']]
price_per_item.head(3)

price_per_item.sort_values('item_price', ascending=False, inplace=True)
price_per_item.head(3)

Unnamed: 0,item_name,item_price
0,Chips and Fresh Tomato Salsa,2.39
1,Izze,3.39
2,Nantucket Nectar,3.39


Unnamed: 0,item_name,item_price
606,Steak Salad Bowl,11.89
1229,Barbacoa Salad Bowl,11.89
1132,Carnitas Salad Bowl,11.89


### Chicken Bowl

In [114]:
chicken_bowl = chipo[(chipo.item_name == 'Chicken Bowl')
                     & (chipo.quantity == 1)]

chicken_bowl.sort_values('item_price', ascending=False, inplace=True)

chicken_bowl[['item_name', 'choice_description', 'item_price']].head(3)

Unnamed: 0,item_name,choice_description,item_price
1073,Chicken Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...",11.25
1401,Chicken Bowl,"[Roasted Chili Corn Salsa, [Rice, Black Beans,...",11.25
1299,Chicken Bowl,"[Roasted Chili Corn Salsa, [Fajita Vegetables,...",11.25


### Sort by item_name

In [122]:
chipo.sort_values(['item_name', 'item_price']).head(3)
# chipo.sort_values('item_name', inplace=True)

# Or
# chipo['item_name'].sort_values()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
298,129,1,6 Pack Soft Drink,[Sprite],6.49
341,148,1,6 Pack Soft Drink,[Diet Coke],6.49
357,154,1,6 Pack Soft Drink,[Coke],6.49


## The quantity of the most expensive item ordered

In [126]:
chipo.sort_values('item_price', ascending=False).head(2)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
3598,1443,15,Chips and Fresh Tomato Salsa,,44.25
3480,1398,3,Carnitas Bowl,"[Roasted Chili Corn Salsa, [Fajita Vegetables,...",35.25


## How many times were a Veggie Salad Bowl ordered?

In [131]:
chipo_salad = chipo[chipo.item_name == 'Veggie Salad Bowl']
chipo_salad.head(3)

chipo_salad.shape[0]

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
186,83,1,Veggie Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...",11.25
295,128,1,Veggie Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Lettu...",11.25
455,195,1,Veggie Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...",11.25


18

## How many times people ordered more than one Canned Soda?

In [133]:
chipo_canned_soda = chipo[(
    chipo.item_name == 'Canned Soda') & (chipo.quantity > 1)]

chipo_canned_soda.head(3)

chipo_canned_soda.shape[0]

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
18,9,2,Canned Soda,[Sprite],2.18
51,23,2,Canned Soda,[Mountain Dew],2.18
162,73,2,Canned Soda,[Diet Coke],2.18


20