# Data exploration: chipotle

In [7]:
import pandas as pd
import numpy as np

### Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv)

In [8]:
# Access the data from URL (using read_csv function)
url = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv'
chipo= pd.read_csv(url, sep = '\t')

chipo.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98


In [9]:
# Nº rows
print(chipo.shape[0])

# Nº columns
print(chipo.shape[1])

4622
5


### Name of columns

In [10]:
list(chipo.columns.values)

['order_id', 'quantity', 'item_name', 'choice_description', 'item_price']

### Index of data

In [11]:
chipo.index

RangeIndex(start=0, stop=4622, step=1)

### Most ordered item

In [12]:
# Counts of unique values (NAs dropped by default)
name_counts = chipo['item_name'].value_counts()

name_counts.head(1)

Chicken Bowl    726
Name: item_name, dtype: int64

### Number of items were ordered

In [13]:
unique_items = chipo['item_name'].unique()

# print(unique_items)

# Data type is 'numpy.ndarray'
print(type(unique_items))

# Number of elements in array
unique_items.size

<class 'numpy.ndarray'>


50

### Most ordered item in the choice_description column

In [14]:
description_counts = chipo['choice_description'].value_counts()

description_counts.head(1)

[Diet Coke]    134
Name: choice_description, dtype: int64

### Items ordered in total

In [15]:
# Option 1
chipo['quantity'].sum()
# Option 2
chipo.quantity.sum()

4972

### Turn the item price into a float

In [16]:
# Complex lambda function
dollarizer = lambda x: float(x[1:-1])
chipo.item_price = chipo.item_price.apply(dollarizer)

### Revenue for the period in the dataset

In [17]:
(chipo['quantity'] * chipo['item_price']).sum()

39237.02

### Number of orders made in the period

In [18]:
chipo['order_id'].value_counts().count()

1834

### Average amount per order

In [19]:
chipo['revenue'] = chipo['quantity'] * chipo['item_price']
order_grouped = chipo.groupby(by=['order_id']).sum()
order_grouped.mean()['revenue']

21.394231188658654

### Different items sold

In [20]:
unique_items.size

50