# Part 1 - Getting and Knowing your Data

This time we are going to pull data directly from the internet.
Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.

### Step 1. Import the necessary libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). 

### Step 3. Assign it to a variable called chipo.

In [5]:
url = "https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv"
chipo = pd.read_csv(url, delimiter='\t')

### Step 4. See the first 10 entries

In [7]:
chipo.head(10)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98
5,3,1,Chicken Bowl,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...",$10.98
6,3,1,Side of Chips,,$1.69
7,4,1,Steak Burrito,"[Tomatillo Red Chili Salsa, [Fajita Vegetables...",$11.75
8,4,1,Steak Soft Tacos,"[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...",$9.25
9,5,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...",$9.25


### Step 5. What is the number of observations in the dataset?

In [9]:
# Solution 1
chipo.shape


(4622, 5)

In [10]:
# Solution 2
#4622 rows 5 columns

chipo

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98
...,...,...,...,...,...
4617,1833,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Sour ...",$11.75
4618,1833,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Sour Cream, Cheese...",$11.75
4619,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Pinto...",$11.25
4620,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Lettu...",$8.75


### Step 6. What is the number of columns in the dataset?

In [11]:
chipo.columns

Index(['order_id', 'quantity', 'item_name', 'choice_description',
       'item_price'],
      dtype='object')

In [13]:
chipo.shape[1] #0 son rows y 1columnas

5

### Step 7. Print the name of all the columns.

In [14]:
chipo.columns

Index(['order_id', 'quantity', 'item_name', 'choice_description',
       'item_price'],
      dtype='object')

### Step 8. How is the dataset indexed?

In [None]:
#indexado automáticamente con números del 0 hasta dónde lleguen las filas


### Step 9. Which was the most-ordered item? 

In [21]:
cantidad = chipo.groupby('item_name')['quantity'].sum()
most_ordered = cantidad.idxmax()

print(f"Most ordered:{most_ordered}")
print(f"Quanity: {cantidad.max()}")

Most ordered:Chicken Bowl
Quanity: 761


### Step 10. For the most-ordered item, how many items were ordered?

In [None]:
#761 items

### Step 11. What was the most ordered item in the choice_description column?

In [22]:
cantidad_choice = chipo.groupby('choice_description')['quantity'].sum()
most_ordered = cantidad_choice.idxmax()

print(f"Most ordered of choice_descriprion:{most_ordered}")
print(f"Quanity: {cantidad_choice.max()}")

Most ordered of choice_descriprion:[Diet Coke]
Quanity: 159


### Step 12. How many items were orderd in total?

In [23]:
total_items = chipo["quantity"].sum()
print(f"The total items ordered are: {total_items}")

The total items ordered are: 4972


### Step 13. Turn the item price into a float

In [26]:
chipo['item_price'] = chipo['item_price'].str.replace('$', '').astype(float)

#### Step 13.a. Check the item price type

In [33]:
chipo["item_price"].dtype

dtype('float64')

#### Step 13.b. Create a lambda function and change the type of item price

In [48]:
chipo["item_price"] = chipo["item_price"].apply(lambda x: str(x))
chipo

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,2.39
1,1,1,Izze,[Clementine],3.39
2,1,1,Nantucket Nectar,[Apple],3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98
...,...,...,...,...,...
4617,1833,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Sour ...",11.75
4618,1833,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Sour Cream, Cheese...",11.75
4619,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Pinto...",11.25
4620,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Lettu...",8.75


#### Step 13.c. Check the item price type

In [49]:
chipo["item_price"].dtype

dtype('O')

In [53]:
chipo["item_price"] = chipo["item_price"].apply(lambda x: float(x))


### Step 14. How much was the revenue for the period in the dataset?

In [52]:
chipo['revenue'] = chipo['quantity'] * chipo['item_price']
tot_revenue = chipo['revenue'].sum()

print(f"Total revenue for the period: {tot_revenue}")

Total revenue for the period: 39237.02


### Step 15. How many orders were made in the period?

In [54]:

num_orders = chipo['order_id'].nunique()
print("Number of orders made in the period:", num_orders)

Number of orders made in the period: 1834


### Step 16. What is the average revenue amount per order?

In [55]:
# Solution 1
avg_rev_order = tot_revenue / num_orders
print(avg_rev_order)


21.39423118865867


In [56]:
# Solution 2
order_revenue = chipo.groupby('order_id')['revenue'].sum()
avg_rev_order2 = order_revenue.mean()
print(avg_rev_order2)



21.39423118865867


### Step 17. How many different items are sold?

In [57]:
unique_items = chipo["item_name"].nunique()
print(unique_items)

50


# Part 2 - Filtering and Sorting Data

This time we are going to pull data directly from the internet.
Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.

### Step 1. Import the necessary libraries

In [58]:
import pandas as pd
import numpy as np

### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). 

### Step 3. Assign it to a variable called chipo.

In [61]:
url = "https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv"
chipo = pd.read_csv(url, delimiter='\t')

### Step 4. How many products cost more than $10.00?

In [65]:
chipo['item_price'] = chipo['item_price'].astype(float)
expensive = chipo[chipo['item_price'] > 10.00] 
num_expensive = len(expensive)
print(num_expensive)

1130


### Step 5. What is the price of each item? 
###### print a data frame with only two columns item_name and item_price

In [66]:
chipo.loc[:, ['item_name', 'item_price']]

Unnamed: 0,item_name,item_price
0,Chips and Fresh Tomato Salsa,2.39
1,Izze,3.39
2,Nantucket Nectar,3.39
3,Chips and Tomatillo-Green Chili Salsa,2.39
4,Chicken Bowl,16.98
...,...,...
4617,Steak Burrito,11.75
4618,Steak Burrito,11.75
4619,Chicken Salad Bowl,11.25
4620,Chicken Salad Bowl,8.75


In [67]:
item_prices = chipo.loc[:, ['item_name', 'item_price']]
item_prices = item_prices.drop_duplicates()
print(item_prices)

                                  item_name  item_price
0              Chips and Fresh Tomato Salsa        2.39
1                                      Izze        3.39
2                          Nantucket Nectar        3.39
3     Chips and Tomatillo-Green Chili Salsa        2.39
4                              Chicken Bowl       16.98
...                                     ...         ...
4237                    Chips and Guacamole        8.50
4354                       Steak Soft Tacos       18.50
4489                    Chips and Guacamole       17.80
4509                                  Chips        1.99
4510                          Barbacoa Bowl       11.49

[209 rows x 2 columns]


### Step 6. Sort by the name of the item

In [68]:
item_prices_sorted = item_prices.sort_values(by='item_name')
print(item_prices_sorted)

              item_name  item_price
3389  6 Pack Soft Drink       12.98
298   6 Pack Soft Drink        6.49
4510      Barbacoa Bowl       11.49
3195      Barbacoa Bowl        8.69
95        Barbacoa Bowl        9.25
...                 ...         ...
186   Veggie Salad Bowl       11.25
3889  Veggie Soft Tacos       16.98
738   Veggie Soft Tacos       11.25
781   Veggie Soft Tacos        8.75
1395  Veggie Soft Tacos        8.49

[209 rows x 2 columns]


### Step 7. What was the quantity of the most expensive item ordered?

In [70]:
max_pric = chipo["item_price"].max()
most_expensive = chipo[chipo["item_price"] == max_pric]
quant_most_exp = most_expensive["quantity"].sum()
print(quant_most_exp)

15


### Step 8. How many times was a Veggie Salad Bowl ordered?

In [71]:
veggie_orders = chipo[chipo['item_name'] == 'Veggie Salad Bowl']
veggie_quant = veggie_orders["quantity"].sum()
print(veggie_quant)

18


### Step 9. How many times did someone order more than one Canned Soda?

In [72]:
more_soda = chipo[(chipo['item_name'] == 'Canned Soda') & (chipo['quantity'] > 1)]
more_soda_orders = len(more_soda)
print(more_soda_orders)

20
