In [90]:
import numpy as np
import pandas as pd
from pandas import Series
from pandas import DataFrame

s1 = Series([4.5, 7.2, -5.3, 3.6], index=list("dbac"))
s1

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

# Reindexado

In [91]:
# De esta manera, no se reindexan los indices

s2 = Series(s1, index=["l", "m", "n", "o"])
s2

l   NaN
m   NaN
n   NaN
o   NaN
dtype: float64

In [92]:
s1.index = list("lmno")
s1

l    4.5
m    7.2
n   -5.3
o    3.6
dtype: float64

In [93]:
s1.reindex(["m","n","o","p","q"], method="ffill")

m    7.2
n   -5.3
o    3.6
p    3.6
q    3.6
dtype: float64

# Cómo alinear los datos

In [94]:
s1 = Series([4.5, 7.2, -5.3, 3.6], index=list("dbac"))
s2 = Series([0,7,1,2,3], index=list("dbecf"))
s1+s2


a     NaN
b    14.2
c     5.6
d     4.5
e     NaN
f     NaN
dtype: float64

In [95]:
s1.add(s2)

a     NaN
b    14.2
c     5.6
d     4.5
e     NaN
f     NaN
dtype: float64

In [96]:
s1.add(s2, fill_value=0)

a    -5.3
b    14.2
c     5.6
d     4.5
e     1.0
f     3.0
dtype: float64

In [97]:
df1 = DataFrame(np.arange(12.).reshape((3,4)),columns=list("abcd"))
df2 = DataFrame(np.arange(20.).reshape((4,5)),columns=list("abcde"))

df1+df2

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,
1,9.0,11.0,13.0,15.0,
2,18.0,20.0,22.0,24.0,
3,,,,,


In [98]:
# add, sub, div, mul, pow, floordiv
display(df1.mul(df2, fill_value=1))
display(df1.rmul(df2, fill_value=1))

Unnamed: 0,a,b,c,d,e
0,0.0,1.0,4.0,9.0,4.0
1,20.0,30.0,42.0,56.0,9.0
2,80.0,99.0,120.0,143.0,14.0
3,15.0,16.0,17.0,18.0,19.0


Unnamed: 0,a,b,c,d,e
0,0.0,1.0,4.0,9.0,4.0
1,20.0,30.0,42.0,56.0,9.0
2,80.0,99.0,120.0,143.0,14.0
3,15.0,16.0,17.0,18.0,19.0


In [99]:
display(df1.sub(df2, fill_value=0))
display(df1.rsub(df2, fill_value=0))

Unnamed: 0,a,b,c,d,e
0,0.0,0.0,0.0,0.0,-4.0
1,-1.0,-1.0,-1.0,-1.0,-9.0
2,-2.0,-2.0,-2.0,-2.0,-14.0
3,-15.0,-16.0,-17.0,-18.0,-19.0


Unnamed: 0,a,b,c,d,e
0,0.0,0.0,0.0,0.0,4.0
1,1.0,1.0,1.0,1.0,9.0
2,2.0,2.0,2.0,2.0,14.0
3,15.0,16.0,17.0,18.0,19.0


In [100]:
display(df1.div(1))
display(df1.rdiv(1))

Unnamed: 0,a,b,c,d
0,0.0,1.0,2.0,3.0
1,4.0,5.0,6.0,7.0
2,8.0,9.0,10.0,11.0


Unnamed: 0,a,b,c,d
0,inf,1.0,0.5,0.333333
1,0.25,0.2,0.166667,0.142857
2,0.125,0.111111,0.1,0.090909


In [101]:
# Se puede rellena un reindex también utilizando fill_value
df1.reindex(columns=df2.columns, fill_value=1000)

Unnamed: 0,a,b,c,d,e
0,0.0,1.0,2.0,3.0,1000
1,4.0,5.0,6.0,7.0,1000
2,8.0,9.0,10.0,11.0,1000


# Operaciones entre DataFrames/Series

In [102]:
df1 = DataFrame(np.arange(12.).reshape((3,4)), columns=list("abcd"))
df2 = DataFrame(np.arange(12.).reshape((4,3)), columns=list("bde"), index=["Sinaloa", "Tamaulipas", "Jalisco", "Nayarit"])
s1 = df2.iloc[0]
s2 = Series(range(3), index=["b", "e", "f"])
arr = np.arange(12.).reshape((3,4))
print(arr)
print(arr-arr[0])

[[ 0.  1.  2.  3.]
 [ 4.  5.  6.  7.]
 [ 8.  9. 10. 11.]]
[[0. 0. 0. 0.]
 [4. 4. 4. 4.]
 [8. 8. 8. 8.]]


In [103]:
display(df2)
print(s1)
df2-s1

Unnamed: 0,b,d,e
Sinaloa,0.0,1.0,2.0
Tamaulipas,3.0,4.0,5.0
Jalisco,6.0,7.0,8.0
Nayarit,9.0,10.0,11.0


b    0.0
d    1.0
e    2.0
Name: Sinaloa, dtype: float64


Unnamed: 0,b,d,e
Sinaloa,0.0,0.0,0.0
Tamaulipas,3.0,3.0,3.0
Jalisco,6.0,6.0,6.0
Nayarit,9.0,9.0,9.0


In [104]:
s3 = df2["d"]

In [105]:
display(df2)
print(s3)
df2.sub(s3, axis="index")


Unnamed: 0,b,d,e
Sinaloa,0.0,1.0,2.0
Tamaulipas,3.0,4.0,5.0
Jalisco,6.0,7.0,8.0
Nayarit,9.0,10.0,11.0


Sinaloa        1.0
Tamaulipas     4.0
Jalisco        7.0
Nayarit       10.0
Name: d, dtype: float64


Unnamed: 0,b,d,e
Sinaloa,-1.0,0.0,1.0
Tamaulipas,-1.0,0.0,1.0
Jalisco,-1.0,0.0,1.0
Nayarit,-1.0,0.0,1.0


# Funciones y mapeo

In [107]:
df3 = df2.rsub(1)
display(df3)

Unnamed: 0,b,d,e
Sinaloa,1.0,0.0,-1.0
Tamaulipas,-2.0,-3.0,-4.0
Jalisco,-5.0,-6.0,-7.0
Nayarit,-8.0,-9.0,-10.0


In [108]:
np.abs(df3)

Unnamed: 0,b,d,e
Sinaloa,1.0,0.0,1.0
Tamaulipas,2.0,3.0,4.0
Jalisco,5.0,6.0,7.0
Nayarit,8.0,9.0,10.0


In [110]:
def f(x):
    return x.max() - x.min()

df3.apply(f, axis="columns")

Sinaloa       2.0
Tamaulipas    2.0
Jalisco       2.0
Nayarit       2.0
dtype: float64

In [111]:
def f2(x):
    return Series([x.min(), x.max(), x.median()], index=["min", "max", "mediana"])

df3.apply(f2)

Unnamed: 0,b,d,e
min,-8.0,-9.0,-10.0
max,1.0,0.0,-1.0
mediana,-3.5,-4.5,-5.5
