# PANDAS: CONCEPTOS CLAVE

In [1]:
import numpy as np
import pandas as pd

### SERIES

##### Creacion 

In [2]:
#Podemos crear series a partir de: listas, arrays, diccionarios
#Si no damos indices pandas da indices naturales del 0 al n-1
serie=pd.Series([1.5,1.6,1.75,1.8])
print(serie)
print('-'*100)

serie=pd.Series(np.arange(1,11))
print(serie)
print('-'*100)

rng = np.random.RandomState(42) #Esto es como una semilla. Como el seed en numpy.
ser = pd.Series(rng.randint(0, 10, 4))
print(ser)



0    1.50
1    1.60
2    1.75
3    1.80
dtype: float64
----------------------------------------------------------------------------------------------------
0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int32
----------------------------------------------------------------------------------------------------
0    6
1    3
2    7
3    4
dtype: int32


In [39]:
#Si queremos darle valores a los indices
serie=pd.Series(np.arange(4), index=['Jane', 'Joe', 'Susan', 'Mike'])
print(serie)
print('-'*100)

serie=pd.Series(np.arange(1,5), index=[2,5,3,7])
print(serie)
print('-'*100)

serie=pd.Series(5,index=[100,200,300])
print(serie)
print('-'*100)

#A partir de diccionarios. Claves: indices, valores:valores
population_dict = {'California': 38332521,'Texas': 26448193,'New York': 19651127,'Florida': 19552860,'Illinois': 12882135}
serie=pd.Series(population_dict) 
print(serie)
print('-'*100)

population_dict = {'California': 38332521,'Texas': 26448193,'New York': 19651127,'Florida': 19552860,'Illinois': 12882135,}
serie=pd.Series(population_dict, index=['California', 'Florida']) #Podemos indicar que nos imprima unicamente los indices (y valores) que queremos
print(serie)

Jane     0
Joe      1
Susan    2
Mike     3
dtype: int32
----------------------------------------------------------------------------------------------------
2    1
5    2
3    3
7    4
dtype: int32
----------------------------------------------------------------------------------------------------
100    5
200    5
300    5
dtype: int64
----------------------------------------------------------------------------------------------------
California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64
----------------------------------------------------------------------------------------------------
California    38332521
Florida       19552860
dtype: int64


##### Acceso 

In [77]:
serie=pd.Series([1.5,1.6,1.75,1.8])
print(serie)
print('-'*100)

print(serie.values) #Accede a los valores
print('-'*100)


print(serie.index) #Nos muestra como estan creados los indices
print(list(serie.index)) #Nos da una lista con los indices
print('-'*100)

print(serie[1]) #serie[indice]=valor. El indice actua como la clave en los diccionarios
print(serie[1:4:1]) #desde el elemento 1 al n-1 (esto no hace referencia al indice (clave) sino a la posicion real 0..n-1)


0    1.50
1    1.60
2    1.75
3    1.80
dtype: float64
----------------------------------------------------------------------------------------------------
[1.5  1.6  1.75 1.8 ]
----------------------------------------------------------------------------------------------------
RangeIndex(start=0, stop=4, step=1)
----------------------------------------------------------------------------------------------------
RangeIndex(start=0, stop=4, step=1)
[0, 1, 2, 3]
----------------------------------------------------------------------------------------------------
1.6
1    1.60
2    1.75
3    1.80
dtype: float64


In [87]:
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd'])
print(data)
print('-'*100)

print(data.keys()) #Clave=indice
print('-'*100)

print(data.index)
print('-'*100)

print(data.values)
print('-'*100)

print(list(data.items()))
print('-'*100)


a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64
----------------------------------------------------------------------------------------------------
Index(['a', 'b', 'c', 'd'], dtype='object')
----------------------------------------------------------------------------------------------------
Index(['a', 'b', 'c', 'd'], dtype='object')
----------------------------------------------------------------------------------------------------
[0.25 0.5  0.75 1.  ]
----------------------------------------------------------------------------------------------------
[('a', 0.25), ('b', 0.5), ('c', 0.75), ('d', 1.0)]
----------------------------------------------------------------------------------------------------


In [36]:
population_dict = {'California': 38332521,'Texas': 26448193,'New York': 19651127,'Florida': 19552860,'Illinois': 12882135}
serie=pd.Series(population_dict) 
print(serie)
print('-'*100)

print(serie['California']) #Da el valor del indice 'California'
print('-'*100)
print(serie['California':'New York']) #Al hacer con string coje el ultimo tambien (hasta el n)

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64
----------------------------------------------------------------------------------------------------
38332521
----------------------------------------------------------------------------------------------------
California    38332521
Texas         26448193
New York      19651127
dtype: int64


In [57]:
data = pd.Series([1, np.nan, 'hello', None])
print(data)
print('-'*100)

print(data[data==1]) #Imprime los datos de valor 1
print('-'*100)

print(data[~(data==1)]) #Imprime los datos que no son d valor 1


0        1
1      NaN
2    hello
3     None
dtype: object
----------------------------------------------------------------------------------------------------
0    1
dtype: object
----------------------------------------------------------------------------------------------------
1      NaN
2    hello
3     None
dtype: object


##### Edicion 

In [91]:
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd'])
print(data)
print('-'*100)

data['e']=1.25
print(data)
print('-'*100)

data['e']=0
print(data)

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64
----------------------------------------------------------------------------------------------------
a    0.25
b    0.50
c    0.75
d    1.00
e    1.25
dtype: float64
----------------------------------------------------------------------------------------------------
a    0.25
b    0.50
c    0.75
d    1.00
e    0.00
dtype: float64


##### Seleccion de datos

In [97]:
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd'])
print(data)
print('-'*100)

print((data > 0.3) & (data < 0.8))
print('-'*100)

print(data[(data > 0.3) & (data < 0.8)])

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64
----------------------------------------------------------------------------------------------------
a    False
b     True
c     True
d    False
dtype: bool
----------------------------------------------------------------------------------------------------
b    0.50
c    0.75
dtype: float64


### DATA FRAME

##### Creacion 

In [66]:
area_dict = {'Texas': 695662, 'California': 423967, 'New York': 141297,
             'Florida': 170312, 'Illinois': 149995, 'Carolina Sur': 445698}
area = pd.Series(area_dict)

population_dict = {'California': 38332521,'Texas': 26448193,'New York': 19651127,'Florida': 19552860,'Illinois': 12882135}
population = pd.Series(population_dict) 

Data_frame=pd.DataFrame({'population':population,'area':area})
print(Data_frame)
print('-'*100)

#Podemos crear Data frame con una unica serie
print(pd.DataFrame(data=population, columns=['Population']))
print('-'*100)

#A partir de una lista
data = [{'a': i, 'b': 2 * i} for i in range(3)] # lista de diccionarios
print(data)
print(pd.DataFrame(data))
print('-'*100)

#A partir de diccionarios
print(pd.DataFrame([{'nombre': "Juan", 'edad': 29}, {'edad': 26, 'altura': 1.75}]))
print('-'*100)

              population    area
California    38332521.0  423967
Carolina Sur         NaN  445698
Florida       19552860.0  170312
Illinois      12882135.0  149995
New York      19651127.0  141297
Texas         26448193.0  695662
----------------------------------------------------------------------------------------------------
            Population
California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
----------------------------------------------------------------------------------------------------
[{'a': 0, 'b': 0}, {'a': 1, 'b': 2}, {'a': 2, 'b': 4}]
   a  b
0  0  0
1  1  2
2  2  4
----------------------------------------------------------------------------------------------------
  nombre  edad  altura
0   Juan    29     NaN
1    NaN    26    1.75
----------------------------------------------------------------------------------------------------


In [3]:
#A partir de una matriz Numpy
np.random.seed(100)
x=pd.DataFrame(np.random.rand(3,2), columns=['Columna 1', 'Columna 2'], index=['a','b','c'])
print(x)
print('-'*100)

#Podemos elegir como queremos guardar los numeros. int/float, numero de bytes...
A = np.zeros(3, dtype=[('A', 'i8'), ('B', 'f8'), ('C', 'f8')]) #Columna y tipo
print(A)
print(pd.DataFrame(A))
print('-'*100)

#Aleatorio
df = pd.DataFrame(rng.randint(0, 10, (3, 4)),columns=['A', 'B', 'C', 'D'])
print(df)


   Columna 1  Columna 2
a   0.543405   0.278369
b   0.424518   0.844776
c   0.004719   0.121569
----------------------------------------------------------------------------------------------------
[(0, 0., 0.) (0, 0., 0.) (0, 0., 0.)]
   A    B    C
0  0  0.0  0.0
1  0  0.0  0.0
2  0  0.0  0.0
----------------------------------------------------------------------------------------------------
   A  B  C  D
0  6  9  2  6
1  7  4  3  7
2  7  2  5  4


##### Acceso

In [52]:
print(Data_frame.index)
print('-'*100)
print(Data_frame.columns)
print('-'*100)

print(Data_frame['area'])
print('-'*100)

print(Data_frame.loc['California']) #Imprime todos los datos de california
print('-'*100)

print(Data_frame.loc['California', 'area']) #Imprime la variable de la columna del indice indicado
print('-'*100)

print(Data_frame.values) #Array de los valores, sin columnas ni indices

Index(['California', 'Carolina Sur', 'Florida', 'Illinois', 'New York',
       'Texas'],
      dtype='object')
----------------------------------------------------------------------------------------------------
Index(['population', 'area'], dtype='object')
----------------------------------------------------------------------------------------------------
California      423967
Carolina Sur    445698
Florida         170312
Illinois        149995
New York        141297
Texas           695662
Name: area, dtype: int64
----------------------------------------------------------------------------------------------------
population    38332521.0
area            423967.0
Name: California, dtype: float64
----------------------------------------------------------------------------------------------------
423967
----------------------------------------------------------------------------------------------------
[[38332521.   423967.]
 [      nan   445698.]
 [19552860.   170312.]
 [12882135.   14

##### Seleccion de datos

In [109]:
area = pd.Series({'California': 423967, 'Texas': 695662,
                  'New York': 141297, 'Florida': 170312,
                  'Illinois': 149995})
pop = pd.Series({'California': 38332521, 'Texas': 26448193,
                 'New York': 19651127, 'Florida': 19552860,
                 'Illinois': 12882135})

data = pd.DataFrame({'area':area, 'population':pop})
print(data)
print('-'*100)

print(data.area is data['area']) #Se puede acceder a los nombres de columnas de las dos maneras
print('-'*100)

data['density'] = data['population'] / data['area'] #Podemos crear nueva variable en la dataframe
print(data)
print('-'*100)

print(data.T) #Transpuesta
print('-'*100)


              area  population
California  423967    38332521
Texas       695662    26448193
New York    141297    19651127
Florida     170312    19552860
Illinois    149995    12882135
----------------------------------------------------------------------------------------------------
True
----------------------------------------------------------------------------------------------------
              area  population     density
California  423967    38332521   90.413926
Texas       695662    26448193   38.018740
New York    141297    19651127  139.076746
Florida     170312    19552860  114.806121
Illinois    149995    12882135   85.883763
----------------------------------------------------------------------------------------------------
              California         Texas      New York       Florida  \
area        4.239670e+05  6.956620e+05  1.412970e+05  1.703120e+05   
population  3.833252e+07  2.644819e+07  1.965113e+07  1.955286e+07   
density     9.041393e+01  3.801874e+01

In [110]:
#loc:
#iloc:
print(data.iloc[:4:2,1:3]) #fila,columna
print('-'*100)
print(data.loc['Illinois':'Texas':-1, 'population':'density'])

            population     density
California    38332521   90.413926
New York      19651127  139.076746
----------------------------------------------------------------------------------------------------
          population     density
Illinois    12882135   85.883763
Florida     19552860  114.806121
New York    19651127  139.076746
Texas       26448193   38.018740


### INDICES

El objeto INDEX en pandas es otro objeto como son las series y los DataFrames.

In [74]:
ejemplo=pd.Index([2, 3, 5, 7, 11]) #en 'Ejemplo' guardamos unos indices
print(ejemplo)
print('-'*100)

print(ejemplo[1]) #Indice que esta en posicion 1 (segundo valor)
print(ejemplo[::2]) #de principio a fijn de dos en dos
print('-'*100)

print(ejemplo.size, ejemplo.shape, ejemplo.ndim, ejemplo.dtype) #Size: numero de elementos, shape:estructura, ndim:numero de dimensiones, dtype:tipo de dato
#NO SE PUEDEN MODIFICAR asi: ind[1]=12

Index([2, 3, 5, 7, 11], dtype='int64')
----------------------------------------------------------------------------------------------------
3
Index([2, 5, 11], dtype='int64')
----------------------------------------------------------------------------------------------------
5 (5,) 1 int64


##### Relacion entre indices

In [75]:
indA = pd.Index([1, 3, 5, 7, 9])
indB = pd.Index([2, 3, 5, 7, 11])

print(indA.intersection(indB)) #Elementos en ambos conjuntos
print(indA.union(indB)) #Union
print(indA.symmetric_difference(indB)) #Elementos solo en un conjunto

Index([3, 5, 7], dtype='int64')
Index([1, 2, 3, 5, 7, 9, 11], dtype='int64')
Index([1, 2, 9, 11], dtype='int64')


Diferencias entre loc y iloc

> loc gets rows (and/or columns) with particular labels.
>
> iloc gets rows (and/or columns) at integer locations.

In [5]:
#https://stackoverflow.com/questions/31593201/how-are-iloc-and-loc-different

s = pd.Series(list("abcdef"), index=[49, 48, 47, 0, 1, 2]) 

print(s.loc[0])    # value at index label 0
print('-'*100)

print(s.iloc[0])   # value at index location 0
print('-'*100)

print(s.loc[0:1])  # rows at index labels between 0 and 1 (inclusive)
print('-'*100)

print(s.iloc[0:1]) # rows at index location between 0 and 1 (exclusive)


d
----------------------------------------------------------------------------------------------------
a
----------------------------------------------------------------------------------------------------
0    d
1    e
dtype: object
----------------------------------------------------------------------------------------------------
49    a
dtype: object


### ALIENACION DE INDICES

In [20]:
area = pd.Series({'Alaska': 1723337,'Texas': 695662, 'California': 423967},name='area')
population = pd.Series({'California': 38332521,'Texas': 26448193, 'New York': 19651127}, name='population')

relacion=population/area
print(relacion)
print('-'*100)

dataf=pd.DataFrame({'area':area,'populacion':population})
print(dataf)
print('-'*100)

dataf['relacion_area']=dataf['populacion']/dataf['area']
dataf

Alaska              NaN
California    90.413926
New York            NaN
Texas         38.018740
dtype: float64
----------------------------------------------------------------------------------------------------
                 area  populacion
Alaska      1723337.0         NaN
California   423967.0  38332521.0
New York          NaN  19651127.0
Texas        695662.0  26448193.0
----------------------------------------------------------------------------------------------------


Unnamed: 0,area,populacion,relacion_area
Alaska,1723337.0,,
California,423967.0,38332521.0,90.413926
New York,,19651127.0,
Texas,695662.0,26448193.0,38.01874


### OPERACIONES

| Python Operator | Pandas Method(s)                      |
|-----------------|---------------------------------------|
| ``+``           | ``add()``                             |
| ``-``           | ``sub()``, ``subtract()``             |
| ``*``           | ``mul()``, ``multiply()``             |
| ``/``           | ``truediv()``, ``div()``, ``divide()``|
| ``//``          | ``floordiv()``                        |
| ``%``           | ``mod()``                             |
| ``**``          | ``pow()``                             |


Operaciones en filas

In [26]:
semilla=np.random.RandomState(17)
A=semilla.randint(10,size=(3,4))
print(A)
print('-'*100)

print(A-A[0]) #La operacion por defecto es por filas

[[1 6 6 9]
 [0 6 4 7]
 [4 7 1 1]]
----------------------------------------------------------------------------------------------------
[[ 0  0  0  0]
 [-1  0 -2 -2]
 [ 3  1 -5 -8]]


In [30]:
datafra=pd.DataFrame(A, columns=list('QRST'))
print(datafra)
print('-'*100)

datafra-datafra.iloc[0] #Tenemos que usar iloc para acceder a los indices naturales

   Q  R  S  T
0  1  6  6  9
1  0  6  4  7
2  4  7  1  1
----------------------------------------------------------------------------------------------------


Unnamed: 0,Q,R,S,T
0,0,0,0,0
1,-1,0,-2,-2
2,3,1,-5,-8


Operaciones en columnas

In [37]:
datafra=pd.DataFrame(A, columns=list('QRST'))
print(datafra)
print('-'*100)

#Para eliminar los valores de cierta columna al resto de las columnas
#ASIX=0 (la operacion es columnar pero va por filas. Borrando cada elemento de la columna fila a fila)
datafra=datafra.subtract(datafra['R'], axis=0)
datafra

   Q  R  S  T
0  1  6  6  9
1  0  6  4  7
2  4  7  1  1
----------------------------------------------------------------------------------------------------


Unnamed: 0,Q,R,S,T
0,-5,0,0,3
1,-6,0,-2,1
2,-3,0,-6,-6


In [41]:
#Si queremos eliminar varias columnas podemos hacer lo siguiente
ejemplo=datafra.iloc[0,::2]
print(ejemplo)
print('-'*100)

datafra-ejemplo

Q   -5
S    0
Name: 0, dtype: int32
----------------------------------------------------------------------------------------------------


Unnamed: 0,Q,R,S,T
0,0.0,,0.0,
1,-1.0,,-2.0,
2,2.0,,-6.0,


### DATOS AUSENTES

|Function Name      |   NaN-safe Version  | Description                                   |
|-------------------|---------------------|-----------------------------------------------|
| ``np.sum``        | ``np.nansum``       | Compute sum of elements                       |
| ``np.prod``       | ``np.nanprod``      | Compute product of elements                   |
| ``np.mean``       | ``np.nanmean``      | Compute mean of elements                      |
| ``np.std``        | ``np.nanstd``       | Compute standard deviation                    |
| ``np.var``        | ``np.nanvar``       | Compute variance                              |
| ``np.min``        | ``np.nanmin``       | Find minimum value                            |
| ``np.max``        | ``np.nanmax``       | Find maximum value                            |
| ``np.argmin``     | ``np.nanargmin``    | Find index of minimum value                   |
| ``np.argmax``     | ``np.nanargmax``    | Find index of maximum value                   |
| ``np.median``     | ``np.nanmedian``    | Compute median of elements                    |
| ``np.percentile`` | ``np.nanpercentile``| Compute rank-based statistics of elements     |
| ``np.any``        | N/A                 | Evaluate whether any elements are true        |
| ``np.all``        | N/A                 | Evaluate whether all elements are true        |

In [48]:
vals1 = np.array([1,None, 3, 4]) #al aparecer un none en un array todma por defecto todos los valores del array como string
print(vals1.dtype)
print('-'*100)

vals2 = np.array([1,np.nan, 3, 4]) #Solucionamos el problema anterior. Los numeros los lee como numeros no como strings
print(vals2.dtype)
print('-'*100)

#Cualquier operacion basica (los de la primera columna d la segunda tabla) con nana dara un nan
print(vals2.sum(), vals2.min(), vals2.max())
print('-'*100)

np.nansum(vals2), np.nanmin(vals2), np.nanmax(vals2) #Para ignorar datos ausentes

object
----------------------------------------------------------------------------------------------------
float64
----------------------------------------------------------------------------------------------------
nan nan nan
----------------------------------------------------------------------------------------------------


(8.0, 1.0, 4.0)

#### Detectando valores nulos


In [54]:
data=pd.Series([1,np.nan,'hello',None])
print(data)
print('-'*100)

print(data.isnull()) #True: cuando un valor es nulo
print('-'*100)

print(data[data.isnull()]) 
print('-'*100)

print(data.notnull()) #True: Cuando un valor existe y no es nulo
print('-'*100)

print(data[data.notnull()]) #True: Cuando un valor existe y no es nulo
print('-'*100)

0        1
1      NaN
2    hello
3     None
dtype: object
----------------------------------------------------------------------------------------------------
0    False
1     True
2    False
3     True
dtype: bool
----------------------------------------------------------------------------------------------------
1     NaN
3    None
dtype: object
----------------------------------------------------------------------------------------------------
0     True
1    False
2     True
3    False
dtype: bool
----------------------------------------------------------------------------------------------------
0        1
2    hello
dtype: object
----------------------------------------------------------------------------------------------------


#### Eliminando valores nulos


In [62]:
data=pd.Series([1,np.nan,'hello',None])
print(data)
print('-'*100)

guardamos=data.dropna() #Borra los valores nulos de la serie data y guarda el resultado en guardamos
print(data) #No se modifica la serie original.
print(guardamos)
print('-'*100)

#Si queremos que se modifique la serie original
data.dropna(inplace=True)
print(data)


0        1
1      NaN
2    hello
3     None
dtype: object
----------------------------------------------------------------------------------------------------
0        1
1      NaN
2    hello
3     None
dtype: object
0        1
2    hello
dtype: object
----------------------------------------------------------------------------------------------------
0        1
2    hello
dtype: object


En un Data Frame no se pueden eliminar valores individuales se borran filas o columnas completas

In [70]:
df = pd.DataFrame([[1,      np.nan, 2],
                   [2,      3,      5],
                   [np.nan, 4,      6]])
print(df)
print('-'*100)

print(df.dropna()) #.dropna(asix=0) Imprime las filas sin datos ausentes.
print('-'*100)

print(df.dropna(axis=1)) #Imprime las columnas sin datos ausentes
print('-'*100)

df[3]=np.nan
print(df)
print('-'*100)

print(df.dropna(axis='columns', how='all')) #elimina las columnas cuyos valores (todos ellos) son ausentes
print('-'*100)

print(df.dropna(axis='columns', how='any')) #Elimina todas las columnas que tengan algun dato ausente (basta con que haya uno que elimina toda la columna)
print('-'*100)

     0    1  2
0  1.0  NaN  2
1  2.0  3.0  5
2  NaN  4.0  6
----------------------------------------------------------------------------------------------------
     0    1  2
1  2.0  3.0  5
----------------------------------------------------------------------------------------------------
   2
0  2
1  5
2  6
----------------------------------------------------------------------------------------------------
     0    1  2   3
0  1.0  NaN  2 NaN
1  2.0  3.0  5 NaN
2  NaN  4.0  6 NaN
----------------------------------------------------------------------------------------------------
     0    1  2
0  1.0  NaN  2
1  2.0  3.0  5
2  NaN  4.0  6
----------------------------------------------------------------------------------------------------
   2
0  2
1  5
2  6
----------------------------------------------------------------------------------------------------


In [77]:
print(df)
print('-'*100)

print(df.dropna(axis='rows', thresh=3)) #imprime las filas con minimo tres valores NO-NULOS
print('-'*100)

print(df.dropna(axis='rows', thresh=len(df.columns)*0.75)) #imprime las filas con el 75% de los datos NONULOS
#MIRAR LO DE SUBSET

     0    1  2   3
0  1.0  NaN  2 NaN
1  2.0  3.0  5 NaN
2  NaN  4.0  6 NaN
----------------------------------------------------------------------------------------------------
     0    1  2   3
1  2.0  3.0  5 NaN
----------------------------------------------------------------------------------------------------
     0    1  2   3
1  2.0  3.0  5 NaN


#### Llenandovalores nulos


In [85]:
data = pd.Series([1, np.nan, 2, None, 3], index=list('abcde'))
print(data)
print('-'*100)

print(data.fillna(0)) #Sustituimos los valores nulos con 0
print('-'*100)

print(data.ffill()) #Lee en orden la serie y si tenemos un NAN lo llena con el dato anterior
print('-'*100)

print(data.bfill()) #Si tenemos un NAN lo llena con el siguiente dato
print('-'*100)

a    1.0
b    NaN
c    2.0
d    NaN
e    3.0
dtype: float64
----------------------------------------------------------------------------------------------------
a    1.0
b    0.0
c    2.0
d    0.0
e    3.0
dtype: float64
----------------------------------------------------------------------------------------------------
a    1.0
b    1.0
c    2.0
d    2.0
e    3.0
dtype: float64
----------------------------------------------------------------------------------------------------
a    1.0
b    2.0
c    2.0
d    3.0
e    3.0
dtype: float64
----------------------------------------------------------------------------------------------------


In [89]:
df = pd.DataFrame([[1,      np.nan, 2],
                   [2,      3,      5],
                   [np.nan, 4,      6]])
print(df)
print('-'*100)

print(df.fillna(df[2].mean())) #Si encuentra un NaN sustituye con la media de la columna 2)
print('-'*100)

df.bfill( axis=1)

     0    1  2
0  1.0  NaN  2
1  2.0  3.0  5
2  NaN  4.0  6
----------------------------------------------------------------------------------------------------
          0         1  2
0  1.000000  4.333333  2
1  2.000000  3.000000  5
2  4.333333  4.000000  6
----------------------------------------------------------------------------------------------------


Unnamed: 0,0,1,2
0,1.0,2.0,2.0
1,2.0,3.0,5.0
2,4.0,4.0,6.0
