# Data Description 

chiptotle food dataset describe the food orders by restaurants.

# Attribute Information :

1). order_id as unique values for the user (numerical value)

2). quantity of product the user buys (numerical values)

3). item_name of the product name (String values)

4). choice_description of the product ingredients used (String values)

5). item_price of the product price

In [0]:
import pandas as pd
import numpy as np

In [0]:
url = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv'

In [0]:
# read the file
df = pd.read_csv(url, sep='\t')

In [4]:
df.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98


In [5]:
# bottom rows of the file
df.tail() 

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
4617,1833,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Sour ...",$11.75
4618,1833,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Sour Cream, Cheese...",$11.75
4619,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Pinto...",$11.25
4620,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Lettu...",$8.75
4621,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Pinto...",$8.75


In [6]:
# data description
df.info()

# for allignment purpose'
print(' ')
print('*'*100)

# total rows
df.shape[0]

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4622 entries, 0 to 4621
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   order_id            4622 non-null   int64 
 1   quantity            4622 non-null   int64 
 2   item_name           4622 non-null   object
 3   choice_description  3376 non-null   object
 4   item_price          4622 non-null   object
dtypes: int64(2), object(3)
memory usage: 180.7+ KB
 
****************************************************************************************************


4622

In [7]:
# name of all the columns
df.columns

Index(['order_id', 'quantity', 'item_name', 'choice_description',
       'item_price'],
      dtype='object')

In [8]:
# dataset indexed
df.index

RangeIndex(start=0, stop=4622, step=1)

In [9]:
# ordered quantity sum'
quan = df.groupby('item_name')
quan = quan.sum()
quan = quan.sort_values(by=['quantity'],ascending=False)
quan.head(10)

Unnamed: 0_level_0,order_id,quantity
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Chicken Bowl,713926,761
Chicken Burrito,497303,591
Chips and Guacamole,449959,506
Steak Burrito,328437,386
Canned Soft Drink,304753,351
Chips,208004,230
Steak Bowl,193752,221
Bottled Water,175944,211
Chips and Fresh Tomato Salsa,100419,130
Canned Soda,76396,126


In [10]:
# choice description sum'
cdcr = df.groupby('choice_description').sum()
cdcr = cdcr.sort_values(by=['quantity'],ascending=False)
cdcr.head(10)

Unnamed: 0_level_0,order_id,quantity
choice_description,Unnamed: 1_level_1,Unnamed: 2_level_1
[Diet Coke],123455,159
[Coke],122752,143
[Sprite],80426,89
"[Fresh Tomato Salsa, [Rice, Black Beans, Cheese, Sour Cream, Lettuce]]",43088,49
"[Fresh Tomato Salsa, [Rice, Black Beans, Cheese, Sour Cream]]",36041,42
"[Fresh Tomato Salsa, [Rice, Black Beans, Cheese, Sour Cream, Guacamole, Lettuce]]",37550,40
[Lemonade],31892,36
"[Fresh Tomato Salsa (Mild), [Pinto Beans, Rice, Cheese, Sour Cream]]",24432,36
[Coca Cola],19282,32
"[Fresh Tomato Salsa, [Rice, Cheese, Sour Cream, Lettuce]]",29614,30


In [0]:
df['item_price'] = df['item_price'].apply(lambda x: float(x[1:-1]))

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4622 entries, 0 to 4621
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   order_id            4622 non-null   int64  
 1   quantity            4622 non-null   int64  
 2   item_name           4622 non-null   object 
 3   choice_description  3376 non-null   object 
 4   item_price          4622 non-null   float64
dtypes: float64(1), int64(2), object(2)
memory usage: 180.7+ KB


In [13]:
# revenue for the dataset
revenue = (df['quantity'] * df['item_price']).sum()
print('Revenue is $' + str(np.round(revenue,2)))

Revenue is $39237.02


In [14]:
# delete the duplicate in item_name and quantity to check the unique item price list
unique_prd = df.drop_duplicates(['item_name','quantity'])

# product with unique value 1
one_prd = unique_prd[unique_prd['quantity'] == 1]

# choose only item_name and item_price from dataset
price_per_prd = one_prd[['item_name','item_price']]

# sort the values most expensic=ve to less expensive
price_per_prd = price_per_prd.sort_values(by='item_price',ascending=False, 
                                          ignore_index=True)
price_per_prd.head(10)

Unnamed: 0,item_name,item_price
0,Steak Salad Bowl,11.89
1,Barbacoa Salad Bowl,11.89
2,Carnitas Salad Bowl,11.89
3,Steak Burrito,11.75
4,Barbacoa Crispy Tacos,11.75
5,Barbacoa Bowl,11.75
6,Veggie Soft Tacos,11.25
7,Veggie Salad Bowl,11.25
8,Veggie Bowl,11.25
9,Veggie Burrito,11.25


In [15]:
# quantity  of most expensive order
df.sort_values(by='item_price',ascending=False,ignore_index=True).head(1)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1443,15,Chips and Fresh Tomato Salsa,,44.25


In [16]:
# How many times particular product was ordered
df[df.item_name == 'Steak Salad Bowl'].sort_values(by='item_price',ascending=False,ignore_index=True)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,123,2,Steak Salad Bowl,"[Tomatillo Red Chili Salsa, [Black Beans, Chee...",23.78
1,253,2,Steak Salad Bowl,"[Tomatillo Red Chili Salsa, [Fajita Vegetables...",23.78
2,969,1,Steak Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...",11.89
3,1403,1,Steak Salad Bowl,"[Roasted Chili Corn Salsa, [Fajita Vegetables,...",11.89
4,1343,1,Steak Salad Bowl,"[Fresh Tomato Salsa, [Cheese, Guacamole, Lettu...",11.89
5,1244,1,Steak Salad Bowl,"[Roasted Chili Corn Salsa, [Fajita Vegetables,...",11.89
6,1176,1,Steak Salad Bowl,"[Fresh Tomato Salsa, [Black Beans, Cheese, Gua...",11.89
7,1114,1,Steak Salad Bowl,"[Tomatillo Red Chili Salsa, [Rice, Black Beans...",11.89
8,1088,1,Steak Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Black...",11.89
9,250,1,Steak Salad Bowl,"[Fresh Tomato Salsa, [Pinto Beans, Cheese, Gua...",11.89
