# Dataset: Chipotle
Dataset link: https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv

## Import the necessary libraries

In [1]:
import pandas as pd
import numpy as np

## Loading the data

In [2]:
url = "https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv"
chip = pd.read_csv(url, sep = '\t')

## Top 10 enteries

In [3]:
chip.head(10)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98
5,3,1,Chicken Bowl,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...",$10.98
6,3,1,Side of Chips,,$1.69
7,4,1,Steak Burrito,"[Tomatillo Red Chili Salsa, [Fajita Vegetables...",$11.75
8,4,1,Steak Soft Tacos,"[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...",$9.25
9,5,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...",$9.25


### 1. What is the number of observations in the dataset?

In [4]:
result = chip.shape[0]
print("Total number of observations: ", result)

Total number of observations:  4622


### 2. What is the number of columns in the dataset?

In [5]:
cols = chip.shape[1]
print("Total number of columns: ", cols)

Total number of columns:  5


### 3. Print the name of all the columns.

In [6]:
col_names = chip.columns
print(f"Column names:\n{col_names}")

Column names:
Index(['order_id', 'quantity', 'item_name', 'choice_description',
       'item_price'],
      dtype='object')


### 4. How is the dataset indexed?

In [7]:
chip.index

RangeIndex(start=0, stop=4622, step=1)

### 5. Which was the most-ordered item?

In [8]:
# Group by item names
c1 = chip.groupby('item_name').sum()
# Sort the items in descending order
c1 = c1.sort_values(['quantity'], ascending = False)
# Pick the first item of the table
result = c1.head(1)
result

Unnamed: 0_level_0,order_id,quantity
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Chicken Bowl,713926,761


In [9]:
c = chip.groupby('item_name').sum().sort_values(['quantity'], ascending = False)
print(f"Most ordered item: \n{c.head(1)}")

Most ordered item: 
              order_id  quantity
item_name                       
Chicken Bowl    713926       761


### 6. For the most-ordered item, how many items were ordered?

In [10]:
c = chip.groupby('item_name').sum().sort_values(['quantity'], ascending = False)
print(f"Most ordered item: \n{c.head(1)}")
result = c.head(1)['quantity'][0]
print(f"\nFor most-ordered item, {result} items were ordered.")

Most ordered item: 
              order_id  quantity
item_name                       
Chicken Bowl    713926       761

For most-ordered item, 761 items were ordered.


### 7. What was the most ordered item in the choice_description column?

In [11]:
c = chip.groupby('choice_description').sum().sort_values(['quantity'], ascending = False)
print(f"Most ordered item in choice description: \n{c.head(1)}")

Most ordered item in choice description: 
                    order_id  quantity
choice_description                    
[Diet Coke]           123455       159


### 8. How many items were orderd in total?

In [12]:
tot_items = chip.quantity.sum()
print(f"{tot_items} items were ordered in total")

4972 items were ordered in total


### 9. Turn the item price into a float

In [13]:
chip['item_price'].dtype

dtype('O')

In [14]:
chip['item_price'] = chip['item_price'].apply(lambda x: float(x[1:-1]))

In [15]:
chip['item_price'].dtype

dtype('float64')

### 10. How much was the revenue for the period in the dataset?

In [16]:
revenue = (chip['quantity'] * chip['item_price']).sum()
print("Revenue was $"+ str(np.round(revenue, 2)))

Revenue was $39237.02


### 11. How many orders were made in the period?

In [17]:
orders = chip['order_id'].value_counts().count()
print(f"{orders} orders were made in the period")

1834 orders were made in the period


### 12. What is the average revenue amount per order?

In [18]:
chip['revenue'] = chip['quantity'] * chip['item_price']

In [19]:
chip.head(5)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price,revenue
0,1,1,Chips and Fresh Tomato Salsa,,2.39,2.39
1,1,1,Izze,[Clementine],3.39,3.39
2,1,1,Nantucket Nectar,[Apple],3.39,3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39,2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98,33.96


In [25]:
order_grouped = chip.groupby(by=['order_id']).sum()
order_grouped.head()

Unnamed: 0_level_0,quantity,item_price,revenue
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,4,11.56,11.56
2,2,16.98,33.96
3,2,12.67,12.67
4,2,21.0,21.0
5,2,13.7,13.7


In [27]:
res = order_grouped['revenue'].mean()
print(f"Average revenue amount per order is ${str(np.round(res, 2))}")

Average revenue amount per order is $21.39


### 13. How many different items are sold?

In [31]:
count = chip['item_name'].value_counts().count()
print(f"{count} different items are sold")

50 different items are sold


In [32]:
len(chip['item_name'].value_counts())

50