# Proyecto: Análisis de datos de ventas en un restaurante


Algunas funciones aplicadas en esta práctica son:

`.info()` <- Extrae la información de la estructura del dataframe

`.describe()` <- Extrae la información estadística del dataframe

`.value_counts()` <- Realiza un conteo de registros que pertenecen a cada clase de valores en una columna

`.groupby()` <- Agrupa información del dataframe 

`.apply()` <- Aplica funciones definidas por el usuario a un dataframe agrupado

`.loc[]` <- Extrae subconjuntos de información

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

In [2]:
pd.options.display.float_format = '{:,.3f}'.format

In [3]:
# Extraemos una base de datos sobre propinas 'tips' en un restaurante
df = sns.load_dataset('tips')
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.990,1.010,Female,No,Sun,Dinner,2
1,10.340,1.660,Male,No,Sun,Dinner,3
2,21.010,3.500,Male,No,Sun,Dinner,3
3,23.680,3.310,Male,No,Sun,Dinner,2
4,24.590,3.610,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.030,5.920,Male,No,Sat,Dinner,3
240,27.180,2.000,Female,Yes,Sat,Dinner,2
241,22.670,2.000,Male,Yes,Sat,Dinner,2
242,17.820,1.750,Male,No,Sat,Dinner,2


In [4]:
# Obtenemos información estructural del dataframe
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   total_bill  244 non-null    float64 
 1   tip         244 non-null    float64 
 2   sex         244 non-null    category
 3   smoker      244 non-null    category
 4   day         244 non-null    category
 5   time        244 non-null    category
 6   size        244 non-null    int64   
dtypes: category(4), float64(2), int64(1)
memory usage: 7.4 KB


In [5]:
# Obtenemos información estadística del dataframe
df.describe()

Unnamed: 0,total_bill,tip,size
count,244.0,244.0,244.0
mean,19.786,2.998,2.57
std,8.902,1.384,0.951
min,3.07,1.0,1.0
25%,13.348,2.0,2.0
50%,17.795,2.9,2.0
75%,24.127,3.562,3.0
max,50.81,10.0,6.0


In [7]:
# Extraemos el tercer cuartil de los datos:
df.describe().loc[ ['75%'] ]

Unnamed: 0,total_bill,tip,size
75%,24.127,3.562,3.0


In [9]:
# Numero de clases en los elemtos de la columna ['day']
df['day'].nunique()

4

In [12]:
# Obtenemos el conteo de registros por día
df['day'].value_counts()

Sat     87
Sun     76
Thur    62
Fri     19
Name: day, dtype: int64

In [13]:
# Obtenemos los registros como porcentajes con respecto al total de visitas:
( df['day'].value_counts() / df['day'].value_counts().sum() )*100

Sat    35.656
Sun    31.148
Thur   25.410
Fri     7.787
Name: day, dtype: float64

In [14]:
# Agregamos una columna que incluya el porcentaje de la propina con respecto al pago de la cuenta:
df['prct_tip'] = df['tip']/df['total_bill']
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,prct_tip
0,16.990,1.010,Female,No,Sun,Dinner,2,0.059
1,10.340,1.660,Male,No,Sun,Dinner,3,0.161
2,21.010,3.500,Male,No,Sun,Dinner,3,0.167
3,23.680,3.310,Male,No,Sun,Dinner,2,0.140
4,24.590,3.610,Female,No,Sun,Dinner,4,0.147
...,...,...,...,...,...,...,...,...
239,29.030,5.920,Male,No,Sat,Dinner,3,0.204
240,27.180,2.000,Female,Yes,Sat,Dinner,2,0.074
241,22.670,2.000,Male,Yes,Sat,Dinner,2,0.088
242,17.820,1.750,Male,No,Sat,Dinner,2,0.098


In [15]:
# Obtenemos el promedio de la información agupada de acuerdo al género:
df.groupby(['sex']).mean()

Unnamed: 0_level_0,total_bill,tip,size,prct_tip
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Male,20.744,3.09,2.631,0.158
Female,18.057,2.833,2.46,0.166


In [16]:
# Obtenemos información estadística de la variable ['prct_tip'], agrupada por ['sex']
df.groupby(['sex'])[ ['prct_tip'] ].describe()

Unnamed: 0_level_0,prct_tip,prct_tip,prct_tip,prct_tip,prct_tip,prct_tip,prct_tip,prct_tip
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Male,157.0,0.158,0.065,0.036,0.121,0.153,0.186,0.71
Female,87.0,0.166,0.054,0.056,0.14,0.156,0.194,0.417


In [17]:
# Obtenemos información estadística de las variables ['total_bill','prct_tip'], agrupada por ['sex']
df.groupby(['sex'])[ ['total_bill','prct_tip'] ].describe()

Unnamed: 0_level_0,total_bill,total_bill,total_bill,total_bill,total_bill,total_bill,total_bill,total_bill,prct_tip,prct_tip,prct_tip,prct_tip,prct_tip,prct_tip,prct_tip,prct_tip
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
Male,157.0,20.744,9.246,7.25,14.0,18.35,24.71,50.81,157.0,0.158,0.065,0.036,0.121,0.153,0.186,0.71
Female,87.0,18.057,8.009,3.07,12.75,16.4,21.52,44.3,87.0,0.166,0.054,0.056,0.14,0.156,0.194,0.417


In [18]:
# Definimos una función que transforma euros a dolares: 
def mean_eur2dollars(x):
    return np.mean(x)*1.12

In [19]:
# Aplicamos la función (definida anteriormente) a los datos agrupados 
df.groupby(['sex','time'])[ ['total_bill','prct_tip'] ].apply(mean_eur2dollars)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,prct_tip
sex,time,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,Lunch,20.214,0.186
Male,Dinner,24.037,0.174
Female,Lunch,18.3,0.182
Female,Dinner,21.519,0.19


In [20]:
# Aplicamos una función de Numpy a los datos agrupados 
S = df.groupby(['sex','time'])[ ['total_bill','prct_tip'] ].apply(np.std)
S

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,prct_tip
sex,time,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,Lunch,7.832,0.045
Male,Dinner,9.423,0.069
Female,Lunch,7.393,0.034
Female,Dinner,8.123,0.063


## Extracción de información con .loc[]

In [21]:
S.loc[['Female'],:]

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,prct_tip
sex,time,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,Lunch,7.393,0.034
Female,Dinner,8.123,0.063


In [22]:
S.loc[['Female'],['Dinner'],:]

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,prct_tip
sex,time,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,Dinner,8.123,0.063


In [23]:
S.loc[['Female'],['Dinner'],:]['prct_tip']

sex     time  
Female  Dinner   0.063
Name: prct_tip, dtype: float64

In [24]:
S.loc[['Female'],['Dinner'],:][['prct_tip']]

Unnamed: 0_level_0,Unnamed: 1_level_0,prct_tip
sex,time,Unnamed: 2_level_1
Female,Dinner,0.063
