# Ex1 - Filtering and Sorting Data

This time we are going to pull data directly from the internet.
Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.

### Step 1. Import the necessary libraries

In [1]:
import numpy as np
import pandas as pd

### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). 

### Step 3. Assign it to a variable called chipo.

In [2]:
chipo = pd.read_csv("https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv", sep="\t")

### Step 4. How many products cost more than $10.00?

In [3]:
chipo

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98
...,...,...,...,...,...
4617,1833,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Sour ...",$11.75
4618,1833,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Sour Cream, Cheese...",$11.75
4619,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Pinto...",$11.25
4620,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Lettu...",$8.75


In [4]:
# the item_price column list the price
chipo.item_price.dtype

dtype('O')

In [5]:
# the item_price column is stored as Object. So if we use > to compare the prices we won't get the 
# desired result. so we first remove the $ sign from the strings and then convert the column to float64 type

chipo = chipo.assign(item_price = lambda df_: df_.item_price.astype("string").str.replace("$", "").astype("float64"))

# the first astype() is used to make sure that we have access to the .str accessor as Object could've been 
# other python objects too

In [6]:
# we are just being fancy here. we could've used
# chipo.item_price = chipo.item_price.apply(lambda x: float(x[1:-1]))
# but the first method we used is preffered since it uses the pandas optimized and vectorized string methods

In [7]:
chipo.item_price.dtype

dtype('float64')

In [8]:
# now onto the main task
chipo[chipo.item_price > 10].item_price.count()

1130

### Step 5. What is the price of each item? 
###### print a data frame with only two columns item_name and item_price

In [9]:
chipo.loc[:, ["item_name", "item_price"]]

Unnamed: 0,item_name,item_price
0,Chips and Fresh Tomato Salsa,2.39
1,Izze,3.39
2,Nantucket Nectar,3.39
3,Chips and Tomatillo-Green Chili Salsa,2.39
4,Chicken Bowl,16.98
...,...,...
4617,Steak Burrito,11.75
4618,Steak Burrito,11.75
4619,Chicken Salad Bowl,11.25
4620,Chicken Salad Bowl,8.75


### Step 6. Sort by the name of the item

In [10]:
chipo.loc[:, ["item_name", "item_price"]].sort_values(by="item_name")

Unnamed: 0,item_name,item_price
3389,6 Pack Soft Drink,12.98
341,6 Pack Soft Drink,6.49
1849,6 Pack Soft Drink,6.49
1860,6 Pack Soft Drink,6.49
2713,6 Pack Soft Drink,6.49
...,...,...
2384,Veggie Soft Tacos,8.75
781,Veggie Soft Tacos,8.75
2851,Veggie Soft Tacos,8.49
1699,Veggie Soft Tacos,11.25


### Step 7. What was the quantity of the most expensive item ordered?

In [11]:
most_expensive_item = chipo.loc[chipo.item_price.argmax(), "item_name"]
# or, chipo.sort_values(by="item_price", ascending=False).head(1).item_name

In [12]:
most_expensive_item

'Chips and Fresh Tomato Salsa'

In [13]:
quantity_of_most_expensive_item_ordered = chipo.loc[chipo.item_price.argmax(), "quantity"]

In [14]:
quantity_of_most_expensive_item_ordered

15

### Step 8. How many times was a Veggie Salad Bowl ordered?

In [15]:
query_string = "Veggie Salad Bowl"

In [16]:
chipo[chipo.item_name == query_string].order_id.count()

18

### Step 9. How many times did someone order more than one Canned Soda?

In [17]:
# as False is counted as 0 and True is counted as 1
(chipo[(chipo.item_name == "Canned Soda") & (chipo.quantity > 1)]).count()

order_id              20
quantity              20
item_name             20
choice_description    20
item_price            20
dtype: int64