Importamos las librerías Pandas y Numpy

In [1]:
import pandas as pd
import numpy as np

### Step 2. Tomaremos el fichero en [esta URL](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv) y lo leeremos como un dataframe.

Pista: [read_csv](https://numpy.org/doc/stable/user/absolute_beginners.html#importing-and-exporting-a-csv)

In [2]:
url = "https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv"

In [4]:
chipotle_df = pd.read_csv(url, sep = '\t')

chipotle_df.head(10)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98
5,3,1,Chicken Bowl,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...",$10.98
6,3,1,Side of Chips,,$1.69
7,4,1,Steak Burrito,"[Tomatillo Red Chili Salsa, [Fajita Vegetables...",$11.75
8,4,1,Steak Soft Tacos,"[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...",$9.25
9,5,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...",$9.25


In [5]:
chipotle_df.describe()

Unnamed: 0,order_id,quantity
count,4622.0,4622.0
mean,927.254868,1.075725
std,528.890796,0.410186
min,1.0,1.0
25%,477.25,1.0
50%,926.0,1.0
75%,1393.0,1.0
max,1834.0,15.0


### Step 3. Veamos los tipos de datos. ¿Podríamos obtener el producto de mayor precio?

In [6]:
chipotle_df.dtypes

order_id               int64
quantity               int64
item_name             object
choice_description    object
item_price            object
dtype: object

In [7]:
# Vamos a ver que producto es el que tiene mayor precio

chipotle_df['item_price'] = chipotle_df['item_price'].str.replace('[$]', '', regex=True).astype(float)


In [8]:
chipotle_df['item_price'].max()

44.25

In [9]:
chipotle_df[(chipotle_df['item_price']== 44.25)]

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
3598,1443,15,Chips and Fresh Tomato Salsa,,44.25


### Step 4. ¿Qué productos cuestan más de $10?

In [10]:
chipotle_df[['order_id','item_name','item_price','choice_description']] [chipotle_df['item_price'] > 10]

Unnamed: 0,order_id,item_name,item_price,choice_description
4,2,Chicken Bowl,16.98,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans..."
5,3,Chicken Bowl,10.98,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou..."
7,4,Steak Burrito,11.75,"[Tomatillo Red Chili Salsa, [Fajita Vegetables..."
13,7,Chicken Bowl,11.25,"[Fresh Tomato Salsa, [Fajita Vegetables, Rice,..."
23,12,Chicken Burrito,10.98,"[[Tomatillo-Green Chili Salsa (Medium), Tomati..."
...,...,...,...,...
4610,1830,Steak Burrito,11.75,"[Fresh Tomato Salsa, [Rice, Sour Cream, Cheese..."
4611,1830,Veggie Burrito,11.25,"[Tomatillo Green Chili Salsa, [Rice, Fajita Ve..."
4617,1833,Steak Burrito,11.75,"[Fresh Tomato Salsa, [Rice, Black Beans, Sour ..."
4618,1833,Steak Burrito,11.75,"[Fresh Tomato Salsa, [Rice, Sour Cream, Cheese..."


In [11]:
chipotle_df['precio_unidad'] = chipotle_df['item_price'] / chipotle_df['quantity']

In [12]:
condicion = chipotle_df['precio_unidad'] > 10

chipotle_df[condicion]

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price,precio_unidad
5,3,1,Chicken Bowl,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...",10.98,10.98
7,4,1,Steak Burrito,"[Tomatillo Red Chili Salsa, [Fajita Vegetables...",11.75,11.75
13,7,1,Chicken Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...",11.25,11.25
23,12,1,Chicken Burrito,"[[Tomatillo-Green Chili Salsa (Medium), Tomati...",10.98,10.98
39,19,1,Barbacoa Bowl,"[Roasted Chili Corn Salsa, [Fajita Vegetables,...",11.75,11.75
...,...,...,...,...,...,...
4610,1830,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Sour Cream, Cheese...",11.75,11.75
4611,1830,1,Veggie Burrito,"[Tomatillo Green Chili Salsa, [Rice, Fajita Ve...",11.25,11.25
4617,1833,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Sour ...",11.75,11.75
4618,1833,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Sour Cream, Cheese...",11.75,11.75


### Step 4.1: ¿Y cuántos pedidos se han hecho con un producto de más de 10$? ¿Es lo mismo?

In [13]:
condicion = (chipotle_df['precio_undiad']>10)

chipotle_df[condicion]['order_id'].nunique()

KeyError: 'precio_undiad'

In [18]:
result_df = chipotle_df.groupby('item_name').agg({
    'item_price': 'mean',
    'precio_unidad': 'mean',
    'quantity': 'max',
    'order_id': 'count'
}).reset_index()

result_df

Unnamed: 0,item_name,item_price,precio_unidad,quantity,order_id
0,6 Pack Soft Drink,6.610185,6.49,2,54
1,Barbacoa Bowl,10.187273,10.187273,1,66
2,Barbacoa Burrito,9.832418,9.832418,1,91
3,Barbacoa Crispy Tacos,10.928182,10.087273,2,11
4,Barbacoa Salad Bowl,10.64,10.64,1,10
5,Barbacoa Soft Tacos,10.0184,10.0184,1,25
6,Bottled Water,1.867654,1.431667,10,162
7,Bowl,14.8,7.4,3,2
8,Burrito,7.4,7.4,1,6
9,Canned Soda,1.320577,1.09,4,104


### Step 4.2: ¿Y cuántos pedidos se han hecho de más de 10$? ¿Es lo mismo?

In [16]:
filtrado = chipotle_df.groupby(by='order_id').sum('item_price')


len(filtrado[filtrado['item_price']>10].value_counts())



481

### Step 4.3: ¿Y en cuántos pedidos se ha pagado más de 10$ por un mismo producto? ¿Es lo mismo?

### Step 5. ¿Qué precio tiene cada producto en distintos pedidos? ¿Hay productos con varios precios?

In [None]:
chipotle_df.groupby(by='item_name')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001FF9ABAC090>

### Step 6. Ordena el dataframe en base al nombre de producto (item name)

In [None]:
chipotle_df['item_name'].sort_values

<bound method Series.sort_values of 0                Chips and Fresh Tomato Salsa
1                                        Izze
2                            Nantucket Nectar
3       Chips and Tomatillo-Green Chili Salsa
4                                Chicken Bowl
                        ...                  
4617                            Steak Burrito
4618                            Steak Burrito
4619                       Chicken Salad Bowl
4620                       Chicken Salad Bowl
4621                       Chicken Salad Bowl
Name: item_name, Length: 4622, dtype: object>

### Step 7. ¿Cuantas veces se ha pedido los productos más caros?

### Step 8. Veamos para el caso de Veggie Salad Bowl. Extrae esa información.