In [1]:
import pandas as pd
import numpy as np

In [2]:
df_ventas = pd.read_excel('data_ventas.xlsx')

# Datos faltantes

In [3]:
df = pd.DataFrame({'A':[1,2,np.nan],
                   'B':[5,np.nan,np.nan],
                   'C':[1,2,3]})

In [4]:
df.isnull()

Unnamed: 0,A,B,C
0,False,False,False
1,False,True,False
2,True,True,False


In [5]:
df.isnull().sum()

A    1
B    2
C    0
dtype: int64

In [6]:
df.dropna()

Unnamed: 0,A,B,C
0,1.0,5.0,1


In [7]:
df.dropna(axis=1)

Unnamed: 0,C
0,1
1,2
2,3


In [8]:
df.fillna(value='RELLENADO')

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,RELLENADO,2
2,RELLENADO,RELLENADO,3


In [9]:
df['A'].fillna(value=df['A'].mean())

0    1.0
1    2.0
2    1.5
Name: A, dtype: float64

# Accesor de textos en series

In [10]:
#para cambiar valor de una columna
df_ventas['Tipo'].replace('Empresa','Compañía')

0       Compañía
1       Compañía
2       Compañía
3        Persona
4        Persona
          ...   
4395     Persona
4396    Compañía
4397     Persona
4398     Persona
4399     Persona
Name: Tipo, Length: 4400, dtype: object

In [11]:
#para cambiar valor de una parte de los valores de una columna
df_ventas['Tipo'].str.replace('Per','per')

0       Empresa
1       Empresa
2       Empresa
3       persona
4       persona
         ...   
4395    persona
4396    Empresa
4397    persona
4398    persona
4399    persona
Name: Tipo, Length: 4400, dtype: object

In [12]:
df_ventas['VendedorFactura'] = df_ventas['Vendedor']+' - '+df_ventas['NroFactura'].astype(str)

In [13]:
df_ventas['VendedorFactura'].str.split('-')

0             [Laura ,  1]
1             [Maria ,  2]
2              [Juan ,  3]
3           [Alfonso ,  4]
4              [Juan ,  5]
               ...        
4395       [Pedro ,  4396]
4396       [Carla ,  4397]
4397        [Juan ,  4398]
4398    [Penelope ,  4399]
4399         [Luz ,  4400]
Name: VendedorFactura, Length: 4400, dtype: object

In [14]:
df_ventas['VendedorFactura'].str.split('-',expand=True)

Unnamed: 0,0,1
0,Laura,1
1,Maria,2
2,Juan,3
3,Alfonso,4
4,Juan,5
...,...,...
4395,Pedro,4396
4396,Carla,4397
4397,Juan,4398
4398,Penelope,4399


# Agrupar por (groupby)

In [15]:
df_ventas.groupby('Vendedor')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x00000193FFE42470>

In [16]:
df_ventas.groupby('Vendedor').sum()

  df_ventas.groupby('Vendedor').sum()


Unnamed: 0_level_0,NroFactura,MontoSinImp,Impuestos,Costo,MontoFactura
Vendedor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alfonso,965215,206924.54,54789.84,37481,261714.38
Carla,1011153,218643.65,57793.78,39628,276437.43
John,931448,198057.33,52260.21,37668,250317.54
Juan,913380,196684.91,51817.63,35069,248502.54
Laura,932976,200555.55,53174.89,36742,253730.44
Luis,1018682,230244.41,60934.83,41643,291179.24
Luz,981986,210187.7,55646.43,38967,265834.13
Maria,1025513,223199.85,59095.21,41023,282295.06
Pedro,941874,207051.85,54737.7,38705,261789.55
Penelope,959973,188477.15,49724.66,37402,238201.81


In [17]:
df_ventas.groupby('Vendedor').max()

Unnamed: 0_level_0,NroFactura,FechaFactura,Cliente,MontoSinImp,Impuestos,Costo,MontoFactura,Tipo,VendedorFactura
Vendedor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Alfonso,4385,2016-07-26,"Zeng, Omar",853.62,226.91,120,1080.53,Persona,Alfonso - 994
Carla,4397,2016-07-28,"Zeng, Omar",895.0,228.69,120,1089.0,Persona,Carla - 998
John,4392,2016-07-28,"Zeng, Omar",896.0,226.4,120,1084.16,Persona,John - 999
Juan,4398,2016-07-28,"Zeng, Omar",864.0,227.67,120,1084.16,Persona,Juan - 993
Laura,4365,2016-07-22,"Zeng, Omar",860.31,228.69,120,1089.0,Persona,Laura - 985
Luis,4395,2016-07-28,"Zeng, Omar",880.0,228.69,120,1089.0,Persona,Luis - 990
Luz,4400,2016-07-29,"Zeng, Omar",860.31,228.69,120,1089.0,Persona,Luz - 996
Maria,4389,2016-07-27,"Zeng, Omar",853.62,226.91,120,1080.53,Persona,Maria - 972
Pedro,4396,2016-07-28,"Zeng, Omar",871.0,228.43,120,1087.79,Persona,Pedro - 995
Penelope,4399,2016-07-28,"Zeng, Omar",859.36,228.43,120,1087.79,Persona,Penelope - 986


In [18]:
df_ventas.groupby('Vendedor').max()['MontoFactura']

Vendedor
Alfonso     1080.53
Carla       1089.00
John        1084.16
Juan        1084.16
Laura       1089.00
Luis        1089.00
Luz         1089.00
Maria       1080.53
Pedro       1087.79
Penelope    1087.79
Name: MontoFactura, dtype: float64

In [19]:
df_ventas.groupby(['Vendedor','Tipo']).mean()['MontoFactura']

  df_ventas.groupby(['Vendedor','Tipo']).mean()['MontoFactura']


Vendedor  Tipo   
Alfonso   Empresa    599.115342
          Persona    590.202893
Carla     Empresa    620.892582
          Persona    603.081107
John      Empresa    563.354557
          Persona    590.870037
Juan      Empresa    592.772183
          Persona    624.824677
Laura     Empresa    622.058902
          Persona    601.293210
Luis      Empresa    616.646250
          Persona    617.048257
Luz       Empresa    590.471027
          Persona    599.988467
Maria     Empresa    599.501813
          Persona    599.258581
Pedro     Empresa    595.186298
          Persona    590.271379
Penelope  Empresa    582.629578
          Persona    550.526459
Name: MontoFactura, dtype: float64

In [20]:
df_ventas.groupby(['Vendedor','Tipo']).mean()[['MontoFactura','Costo']]

  df_ventas.groupby(['Vendedor','Tipo']).mean()[['MontoFactura','Costo']]


Unnamed: 0_level_0,Unnamed: 1_level_0,MontoFactura,Costo
Vendedor,Tipo,Unnamed: 2_level_1,Unnamed: 3_level_1
Alfonso,Empresa,599.115342,84.161491
Alfonso,Persona,590.202893,85.467857
Carla,Empresa,620.892582,86.538462
Carla,Persona,603.081107,88.110701
John,Empresa,563.354557,85.873418
John,Persona,590.870037,88.278388
Juan,Empresa,592.772183,88.697183
Juan,Persona,624.824677,85.452471
Laura,Empresa,622.058902,88.427746
Laura,Persona,601.29321,88.246914


In [21]:
pd.options.display.float_format = '{:,.2f}'.format
#metodo 1
df_ventas.groupby(['Vendedor','Tipo']).agg({'MontoFactura':'sum','Costo':'mean'})

Unnamed: 0_level_0,Unnamed: 1_level_0,MontoFactura,Costo
Vendedor,Tipo,Unnamed: 2_level_1,Unnamed: 3_level_1
Alfonso,Empresa,96457.57,84.16
Alfonso,Persona,165256.81,85.47
Carla,Empresa,113002.45,86.54
Carla,Persona,163434.98,88.11
John,Empresa,89010.02,85.87
John,Persona,161307.52,88.28
Juan,Empresa,84173.65,88.7
Juan,Persona,164328.89,85.45
Laura,Empresa,107616.19,88.43
Laura,Persona,146114.25,88.25


In [22]:
#metodo 2 recomendado cuando queremos hacer 2 calculos diferentes de la misma columna
df_ventas.groupby(['Vendedor','Tipo'])['MontoFactura'].agg(['sum','mean'])

Unnamed: 0_level_0,Unnamed: 1_level_0,sum,mean
Vendedor,Tipo,Unnamed: 2_level_1,Unnamed: 3_level_1
Alfonso,Empresa,96457.57,599.12
Alfonso,Persona,165256.81,590.2
Carla,Empresa,113002.45,620.89
Carla,Persona,163434.98,603.08
John,Empresa,89010.02,563.35
John,Persona,161307.52,590.87
Juan,Empresa,84173.65,592.77
Juan,Persona,164328.89,624.82
Laura,Empresa,107616.19,622.06
Laura,Persona,146114.25,601.29


## Resumiendo datos
Recuento de valores, valores unicos, cantidad de valores unicos

In [23]:
df_ventas['Tipo'].value_counts()

Persona    2702
Empresa    1698
Name: Tipo, dtype: int64

In [24]:
df_ventas['Vendedor'].value_counts()

Luis        472
Maria       471
Carla       453
Luz         446
Pedro       442
Alfonso     441
John        431
Penelope    423
Laura       416
Juan        405
Name: Vendedor, dtype: int64

In [25]:
df_ventas['Tipo'].nunique()

2

In [26]:
df_ventas['Tipo'].unique()

array(['Empresa', 'Persona'], dtype=object)

In [27]:
df_ventas['Vendedor'].nunique()

10

In [28]:
df_ventas['Vendedor'].unique()

array(['Laura', 'Maria', 'Juan', 'Alfonso', 'John', 'Luz', 'Penelope',
       'Carla', 'Pedro', 'Luis'], dtype=object)

# Tablas Pivote

In [29]:
df_ventas.pivot_table(values='MontoFactura',
                      index=['Vendedor'],
                      columns=['Tipo']) #automaticamente saca el promedio..

Tipo,Empresa,Persona
Vendedor,Unnamed: 1_level_1,Unnamed: 2_level_1
Alfonso,599.12,590.2
Carla,620.89,603.08
John,563.35,590.87
Juan,592.77,624.82
Laura,622.06,601.29
Luis,616.65,617.05
Luz,590.47,599.99
Maria,599.5,599.26
Pedro,595.19,590.27
Penelope,582.63,550.53


In [30]:
df_ventas.pivot_table(values='MontoFactura',
                      index=['Vendedor'],
                      columns=['Tipo'],
                      aggfunc=np.sum) #modificamos el calculo

Tipo,Empresa,Persona
Vendedor,Unnamed: 1_level_1,Unnamed: 2_level_1
Alfonso,96457.57,165256.81
Carla,113002.45,163434.98
John,89010.02,161307.52
Juan,84173.65,164328.89
Laura,107616.19,146114.25
Luis,103596.57,187582.67
Luz,109237.14,156596.99
Maria,109109.33,173185.73
Pedro,107728.72,154060.83
Penelope,96716.51,141485.3


In [31]:
df_ventas.pivot_table(values='Costo',
                     index=['Tipo'],
                     columns=['Vendedor'],
                     aggfunc=np.sum) #modificamos el calculo

Vendedor,Alfonso,Carla,John,Juan,Laura,Luis,Luz,Maria,Pedro,Penelope
Tipo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Empresa,13550,15750,13568,12595,15298,15001,16178,15874,15860,14645
Persona,23931,23878,24100,22474,21444,26642,22789,25149,22845,22757


In [32]:
df_ventas.pivot_table(values='Costo',
                     index=['Tipo'],
                     columns=['Vendedor'],
                     aggfunc=[np.max,np.mean,np.min]).transpose()

Unnamed: 0_level_0,Tipo,Empresa,Persona
Unnamed: 0_level_1,Vendedor,Unnamed: 2_level_1,Unnamed: 3_level_1
amax,Alfonso,120.0,120.0
amax,Carla,120.0,120.0
amax,John,120.0,120.0
amax,Juan,120.0,120.0
amax,Laura,120.0,120.0
amax,Luis,120.0,120.0
amax,Luz,119.0,120.0
amax,Maria,120.0,120.0
amax,Pedro,120.0,120.0
amax,Penelope,120.0,120.0


# Tablas Melt

In [33]:
ventas_year = {'sucursal':['A','B','C'],
               '2014':[150,200,175],
               '2015':[25,75,100],
               '2016':[55,95,145]}

In [34]:
df_melt = pd.DataFrame(ventas_year)

In [35]:
df_melt

Unnamed: 0,sucursal,2014,2015,2016
0,A,150,25,55
1,B,200,75,95
2,C,175,100,145


In [36]:
pd.melt(df_melt, id_vars =['sucursal']) 

Unnamed: 0,sucursal,variable,value
0,A,2014,150
1,B,2014,200
2,C,2014,175
3,A,2015,25
4,B,2015,75
5,C,2015,100
6,A,2016,55
7,B,2016,95
8,C,2016,145


In [37]:
pd.melt(df_melt, id_vars =['sucursal'], value_vars=['2014']) 

Unnamed: 0,sucursal,variable,value
0,A,2014,150
1,B,2014,200
2,C,2014,175


# Ejercicio

**Cargue el dataset Ventas.xlsx a un dataframe llamado ventas y resuelva los siguientes puntos:**

- Revise cuales columnas tiene valores nulos

- Corrija los valores nulos con los valores anteriores

- Revise cuantos vendedores distintos hay

- Revise cuantos id vendedores diferentes hay

- Revise cual es el nombre de cada vendedor en el campo vendedores

- Revise cuantos registros hay por Vendedor (value_count)

- Revise cuantos registros hay por Id Vendedor (value_count)

- Corrija los vendedores que tienen el nombre incorrecto

- Revise cuantas sucursales distintas hay

- Haga una tabla que pueda ver el monto facturado total por sucursal ordenado de mayor a menor (sort_values)

- Haga una tabla que pueda ver el monto facturado promedio por vendedor

- Haga una tabla que pueda ver el monto facturado promedio, el monto facturado total por vendedor y sucursal

- Haga una tabla que tenga en el indice el sucursal, en la columna la vendedor y como valor el montofacturado total

In [38]:
ventas = pd.read_excel('Ventas.xlsx')

In [39]:
ventas.isnull().sum()

Id Vendedor      0
Vendedor         0
NroFactura      37
Sucursal         0
Costo            0
MontoFactura     0
dtype: int64

In [40]:
ventas = ventas.fillna(method='ffill')
ventas.isnull().sum()

Id Vendedor     0
Vendedor        0
NroFactura      0
Sucursal        0
Costo           0
MontoFactura    0
dtype: int64

In [41]:
ventas['Vendedor'].nunique()

12

In [42]:
ventas['Vendedor'].unique()

array(['Alvaro Enrique', 'Lucas Almonte', 'Juan Valdez', 'Pedro Perez',
       'Jon Wilson', 'Leidy Almanzar', 'John Wilson', 'Laura Diaz',
       'Liam Canelo', 'Luis Alberto', 'Mariana Almonte', 'Laury Diaz'],
      dtype=object)

In [43]:
ventas['Id Vendedor'].nunique()

10

In [44]:
ventas['Vendedor'].value_counts()

Mariana Almonte    472
Lucas Almonte      471
Liam Canelo        453
Leidy Almanzar     446
Luis Alberto       442
Pedro Perez        441
John Wilson        423
Laura Diaz         418
Alvaro Enrique     416
Juan Valdez        405
Jon Wilson           8
Laury Diaz           5
Name: Vendedor, dtype: int64

In [45]:
ventas['Id Vendedor'].value_counts()

3     472
4     471
6     453
7     446
10    442
2     441
8     431
5     423
9     416
1     405
Name: Id Vendedor, dtype: int64

In [46]:
ventas['Vendedor'] = ventas['Vendedor'].replace('Laury Diaz', 'Laura Diaz')
ventas['Vendedor'] = ventas['Vendedor'].replace('Jon Wilson', 'John Wilson')

In [47]:
ventas['Vendedor'].nunique()

10

In [48]:
ventas['Sucursal'].nunique()

5

In [49]:
ventas.groupby('Sucursal').sum()['MontoFactura'].sort_values(ascending=False)

  ventas.groupby('Sucursal').sum()['MontoFactura'].sort_values(ascending=False)


Sucursal
Sosua           553,654.41
San Pedro       534,588.05
Santiago        530,060.71
San Juan        523,050.28
Santo Domingo   488,648.67
Name: MontoFactura, dtype: float64

In [50]:
ventas.groupby('Vendedor').mean()['MontoFactura'].sort_values(ascending=False)

  ventas.groupby('Vendedor').mean()['MontoFactura'].sort_values(ascending=False)


Vendedor
Mariana Almonte   616.91
Juan Valdez       613.59
Liam Canelo       610.24
Alvaro Enrique    609.93
Lucas Almonte     599.35
Leidy Almanzar    596.04
Pedro Perez       593.46
Luis Alberto      592.28
John Wilson       580.78
Laura Diaz        563.12
Name: MontoFactura, dtype: float64

In [51]:
ventas.groupby(['Vendedor','Sucursal'])['MontoFactura'].agg(['mean','sum'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,sum
Vendedor,Sucursal,Unnamed: 2_level_1,Unnamed: 3_level_1
Alvaro Enrique,San Juan,552.26,47494.59
Alvaro Enrique,San Pedro,625.9,55079.02
Alvaro Enrique,Santiago,612.8,52088.08
Alvaro Enrique,Santo Domingo,592.21,45600.06
Alvaro Enrique,Sosua,668.36,53468.69
John Wilson,San Juan,595.86,47669.16
John Wilson,San Pedro,508.97,46316.38
John Wilson,Santiago,592.0,53279.93
John Wilson,Santo Domingo,631.92,53081.49
John Wilson,Sosua,581.05,49970.58


In [52]:
ventas.pivot_table(index = 'Sucursal', columns = 'Vendedor', values = 'MontoFactura', aggfunc = 'sum')

Vendedor,Alvaro Enrique,John Wilson,Juan Valdez,Laura Diaz,Leidy Almanzar,Liam Canelo,Lucas Almonte,Luis Alberto,Mariana Almonte,Pedro Perez
Sucursal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
San Juan,47494.59,47669.16,60243.48,44640.53,51497.6,58857.1,47705.46,49400.67,67874.95,47666.74
San Pedro,55079.02,46316.38,50124.25,52407.52,58219.15,49339.51,55646.69,50828.47,63614.54,53012.52
Santiago,52088.08,53279.93,44725.23,42623.46,49077.6,59571.93,66450.42,51458.88,57292.29,53492.89
Santo Domingo,45600.06,53081.49,38504.62,49867.73,49539.82,38924.49,52615.64,57692.8,45660.56,57161.46
Sosua,53468.69,49970.58,54904.96,48662.57,57499.96,69744.4,59876.85,52408.73,56736.9,50380.77


# Fin