# Actualización de campos 

importemos pandas

In [1]:
import pandas as pd

## Método update()

creemos los datasets que usaremos 

In [2]:
%%writefile dataset/data_1.csv
clientId,name,bonus,costs
10,Omar Y. Fletcher,0,9999
11,Buffy W. Vincent,1,
12,Mira N. Franklin,2,
13,Ferris Q. Le,3,4

Overwriting dataset/data_1.csv


In [3]:
%%writefile dataset/data_2.csv
clientId,location,bonus,costs
12,"P.O. Box 445, 323 Cursus Rd.",100,12
13,Ap #791-3809 Eu Street,,13
14,6715 Diam. Rd.,200,

Overwriting dataset/data_2.csv


Actualizemos los valores diferentes de NaN del dataframe 2 en el dataframe 1 (donde haya coincidencia)

In [4]:
df_1 = pd.read_csv("dataset/data_1.csv")
df_1.set_index('clientId', inplace=True)

df_2 = pd.read_csv("dataset/data_2.csv")
df_2.set_index('clientId', inplace=True)


print(
    df_1,
    "",
    df_2,
    sep="\n",
)

df_1.update(df_2)

#
# Update df_1.
#
# Note que costs para el primer cliente se
# actualiza con el valor del segundo df
#
# Tambien se actualiza el ultimo registro
# para el campo bonus
#
print("", df_1, sep="\n")

                      name  bonus   costs
clientId                                 
10        Omar Y. Fletcher      0  9999.0
11        Buffy W. Vincent      1     NaN
12        Mira N. Franklin      2     NaN
13            Ferris Q. Le      3     4.0

                              location  bonus  costs
clientId                                            
12        P.O. Box 445, 323 Cursus Rd.  100.0   12.0
13              Ap #791-3809 Eu Street    NaN   13.0
14                      6715 Diam. Rd.  200.0    NaN

                      name  bonus   costs
clientId                                 
10        Omar Y. Fletcher      0  9999.0
11        Buffy W. Vincent      1     NaN
12        Mira N. Franklin    100    12.0
13            Ferris Q. Le      3    13.0


Ahora hagamos lo contrario, actualizemos los valores del dataframe 1 en el dataframe 2

In [None]:
df_1 = pd.read_csv("dataset/data_1.csv")
df_1.set_index('clientId', inplace=True)

df_2 = pd.read_csv("dataset/data_2.csv")
df_2.set_index('clientId', inplace=True)


print(
    df_1,
    "",
    df_2,
    sep="\n",
)

df_2.update(df_1)


print("", df_2, sep="\n")

                      name  bonus   costs
clientId                                 
10        Omar Y. Fletcher      0  9999.0
11        Buffy W. Vincent      1     NaN
12        Mira N. Franklin      2     NaN
13            Ferris Q. Le      3     4.0

                              location  bonus  costs
clientId                                            
12        P.O. Box 445, 323 Cursus Rd.  100.0   12.0
13              Ap #791-3809 Eu Street    NaN   13.0
14                      6715 Diam. Rd.  200.0    NaN

                              location  bonus  costs
clientId                                            
12        P.O. Box 445, 323 Cursus Rd.    2.0   12.0
13              Ap #791-3809 Eu Street    3.0    4.0
14                      6715 Diam. Rd.  200.0    NaN


## Método combine() sobre columnas

creemos los dataset

In [6]:
%%writefile dataset/data_1.csv
A,B
0,4
0,4

Overwriting dataset/data_1.csv


In [7]:
%%writefile dataset/data_2.csv
A,B
1,3
1,3

Overwriting dataset/data_2.csv


Este método combina los valores de la columna basándose en una función 

In [8]:
df_1 = pd.read_csv("dataset/data_1.csv")
df_2 = pd.read_csv("dataset/data_2.csv")

print(df_1, "", df_2, "", sep="\n")

#
# Mínimo de la suma de columnas
#
take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2

df_1 = df_1.combine(
    other=df_2,
    func=take_smaller,
)

print(df_1)

   A  B
0  0  4
1  0  4

   A  B
0  1  3
1  1  3

   A  B
0  0  3
1  0  3


## Método combine() sobre elementos


creemos los dataset

In [9]:
%%writefile dataset/data_1.csv
A,B
3,6
4,5

Overwriting dataset/data_1.csv


In [10]:
%%writefile dataset/data_2.csv
A,B
1,2
9,7

Overwriting dataset/data_2.csv


In [11]:
import numpy as np

df_1 = pd.read_csv("dataset/data_1.csv")
df_2 = pd.read_csv("dataset/data_2.csv")

print(df_1, "", df_2, "", sep="\n")

df_1 = df_1.combine(
    other=df_2,
    func=np.minimum,
)

print(df_1)

   A  B
0  3  6
1  4  5

   A  B
0  1  2
1  9  7

   A  B
0  1  2
1  4  5


## Método combine_first() 


combina los valores nulos de un dataframe con los valores de otro dataframe


In [12]:
df_1 = pd.DataFrame({'A': [None, 0], 'B': [None, 4]})
df_2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]})
result = df_1.combine_first(df_2)

print(df_1, "", df_2, "", result, sep="\n")

     A    B
0  NaN  NaN
1  0.0  4.0

   A  B
0  1  3
1  1  3

     A    B
0  1.0  3.0
1  0.0  4.0
