<h1><center> Data Analysis: Introduction to Pandas </center></h1>

<h2> Part II. Filtering and Sorting Data </h2>

<i>Example 1. Import the necessary libraries</i>

In [1]:
import pandas as pd

<i>Example 2. Import the dataset from the local file </i>

In [2]:
order_list = pd.read_csv('./order_list.csv', sep = '\t')
order_list.head()

Unnamed: 0,id,quantity,food,description,unit_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98


<i>Example 3. Convert the <i>"unit_price"</i> column to float type</i>

In [3]:
#Convert unit price to float (No $)
prices = []
for value in order_list.unit_price:
    value_f = float(value[1: -1])
    prices.append(value_f)
order_list.unit_price = prices
order_list.head()

Unnamed: 0,id,quantity,food,description,unit_price
0,1,1,Chips and Fresh Tomato Salsa,,2.39
1,1,1,Izze,[Clementine],3.39
2,1,1,Nantucket Nectar,[Apple],3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98


<b>Exercise 1. Eliminate the rows with duplicate value in the dataset</b>

In [4]:
order_list2 = order_list.drop_duplicates()

In [5]:
print('order_list.shape: ', order_list.shape)
print('order_list2.shape: ', order_list2.shape)


order_list.shape:  (4622, 5)
order_list2.shape:  (4563, 5)


<b>Exercise 2. Display the orders which costs more than $20?</b>

In [6]:
order_list2.head(3)

Unnamed: 0,id,quantity,food,description,unit_price
0,1,1,Chips and Fresh Tomato Salsa,,2.39
1,1,1,Izze,[Clementine],3.39
2,1,1,Nantucket Nectar,[Apple],3.39


In [7]:
order_list2['total_price'] = order_list2['quantity'] * order_list2['unit_price']
order_list2.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  order_list2['total_price'] = order_list2['quantity'] * order_list2['unit_price']


Unnamed: 0,id,quantity,food,description,unit_price,total_price
0,1,1,Chips and Fresh Tomato Salsa,,2.39,2.39
1,1,1,Izze,[Clementine],3.39,3.39
2,1,1,Nantucket Nectar,[Apple],3.39,3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39,2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98,33.96


In [8]:
order_price = (order_list2[['id', 'total_price']].groupby('id', as_index=False).agg(order_price = ('total_price', 'sum')))
more_than_20 = order_price[order_price['order_price'] > 20]
more_than_20.head()

Unnamed: 0,id,order_price
1,2,33.96
3,4,21.0
17,18,24.9
19,20,36.2
20,21,23.36


<b>Exercise 3. How many orders that costs more than $20?</b>

In [9]:
more_than_20['id'].unique()
more_than_20.shape[0]


616

<b>Exercise 4. Sort the list by the name of the food in alphabetical order.</b>

In [17]:
order_list2.sort_values('food', ascending=True)

Unnamed: 0,id,quantity,food,description,unit_price,total_price
4527,1800,1,6 Pack Soft Drink,[Diet Coke],6.49,6.49
2374,945,1,6 Pack Soft Drink,[Sprite],6.49,6.49
2860,1136,1,6 Pack Soft Drink,[Diet Coke],6.49,6.49
1755,708,1,6 Pack Soft Drink,[Coke],6.49,6.49
1279,520,1,6 Pack Soft Drink,[Sprite],6.49,6.49
...,...,...,...,...,...,...
738,304,1,Veggie Soft Tacos,"[Tomatillo Red Chili Salsa, [Fajita Vegetables...",11.25,11.25
1395,567,1,Veggie Soft Tacos,"[Fresh Tomato Salsa (Mild), [Pinto Beans, Rice...",8.49,8.49
1699,688,1,Veggie Soft Tacos,"[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...",11.25,11.25
2851,1132,1,Veggie Soft Tacos,"[Roasted Chili Corn Salsa (Medium), [Black Bea...",8.49,8.49


<b>Exercise 5a. Display the list by the total quantity of each order sorted in descending order.

In [18]:
total_quantity = (order_list2[['id', 'quantity']].groupby('id', as_index=False).aggregate(total_quantity = ('quantity', 'sum')).sort_values('total_quantity', ascending=False))

total_quantity.head()

Unnamed: 0,id,total_quantity
1442,1443,35
925,926,21
1785,1786,20
758,759,18
1659,1660,18


<b>Exercise 5b. Which order has the highest price?

In [19]:
highest_price = (order_list2[['id', 'total_price']].groupby('id', as_index=False).agg(order_price = ('total_price', 'sum')).sort_values('order_price', ascending=False))
highest_price.head()

Unnamed: 0,id,order_price
1442,1443,1074.24
510,511,315.29
1558,1559,246.0
1659,1660,218.3
1785,1786,197.7


<b>Exercise 6. Display the list of the orders of "Tacos" (All types of Tacos) and sort by the price in ascending order.</b>

In [20]:
tacos = order_list2[order_list2['food'].str.contains('Tacos')]

order_price_tacos = (tacos[['id', 'total_price']].groupby('id', as_index=False).agg(order_price = ('total_price', 'sum')).sort_values('order_price', ascending=True))

order_price_tacos.head()

Unnamed: 0,id,order_price
153,867,8.49
210,1211,8.49
209,1210,8.49
204,1156,8.49
198,1132,8.49


<b>Exercise 7. How many orders was made for "Salad"?</b>

In [31]:
salad = order_list2[order_list2['food'].str.contains('Salad')]

salad['id'].unique()

array([  20,   60,   83,   94,  109,  111,  123,  128,  137,  195,  207,
        220,  221,  234,  240,  243,  250,  253,  261,  276,  286,  289,
        309,  311,  314,  337,  357,  369,  394,  439,  458,  459,  468,
        478,  488,  501,  534,  536,  541,  552,  575,  576,  577,  592,
        598,  602,  612,  621,  628,  637,  644,  649,  654,  686,  691,
        701,  709,  731,  734,  738,  756,  760,  766,  777,  791,  798,
        818,  850,  869,  896,  913,  926,  953,  960,  962,  969,  970,
       1015, 1032, 1035, 1038, 1042, 1066, 1083, 1088, 1094, 1114, 1136,
       1138, 1145, 1152, 1168, 1176, 1181, 1189, 1192, 1193, 1206, 1212,
       1222, 1235, 1243, 1244, 1263, 1265, 1269, 1283, 1289, 1305, 1321,
       1327, 1329, 1343, 1346, 1349, 1357, 1368, 1383, 1395, 1403, 1406,
       1409, 1426, 1435, 1450, 1455, 1461, 1474, 1477, 1483, 1490, 1491,
       1499, 1500, 1517, 1521, 1522, 1592, 1596, 1598, 1601, 1606, 1615,
       1627, 1633, 1635, 1636, 1646, 1677, 1679, 16

<b>Exercise 8. How many "Salad" were sold?</b>

In [None]:
quantity_salad = salad['quantity'].sum()
quantity_salad

np.int64(209)

<b>Exercise 9. Display the list of orders of "Soft Tacos" with the minimum of 2 Soft Tacos?</b>

In [34]:
soft_tacos = order_list2[order_list2['food'].str.contains('Soft Tacos')]

quantity_soft_tacos = (soft_tacos[['id', 'quantity']].groupby('id', as_index=False).agg(total_quantity=('quantity', 'sum')))

quantity_soft_tacos[quantity_soft_tacos['total_quantity']>=2].head()

Unnamed: 0,id,total_quantity
2,18,2
21,145,2
22,149,2
23,153,2
27,185,2


<b>Exercise 10. How many orders of more than two Steak Burrito?</b>

In [36]:
steak_burrito = order_list2[order_list2['food'].str.contains('Steak Burrito')]

quantity_steak_burrito = (steak_burrito[['id', 'quantity']].groupby('id', as_index=False).agg(total_quantity = ('quantity', 'sum')))

more_than_2 = quantity_steak_burrito[quantity_steak_burrito['total_quantity']>2]

print(more_than_2['id'].unique())
more_than_2

[ 511  691 1431 1443]


Unnamed: 0,id,total_quantity
103,511,3
134,691,4
269,1431,3
271,1443,3


>Exercise 11. Find the mean of the number of Water ordered each time?</b>

In [37]:
water = order_list2[order_list2['food'].str.contains('Water')]

quantity_water = (water[['id', 'quantity']].groupby('id', as_index=False).agg(total_quantity=('quantity', 'sum')))

print(quantity_water['total_quantity'].mean())

1.3246753246753247
