# Pandas

In [1]:
import pandas as pd

In [21]:
import numpy as np

In [18]:
# los diccionarios cuentan como un elemento
valores = [6, 8, 34, 94, 78]
indice = range(len(valores))

serie1 = pd.Series(data = valores, index = indice)
serie1

0     6
1     8
2    34
3    94
4    78
dtype: int64

In [19]:
serie2 = valores2 = [28, 45, 852, 3, 16]
serie2 = pd.Series(index = indice, data = valores2)
serie2

0     28
1     45
2    852
3      3
4     16
dtype: int64

In [20]:
serie1**serie2

0   -1823562080465190912
1                      0
2                      0
3                 830584
4    3945326505906733056
dtype: int64

Las operaciones va a ser elemento a elemento, por lo que no podemos combinar el tipo de datos

### numpy

In [22]:
array = np.random.normal(-10, 10, size=[4, 4])
array

array([[-19.74768333,  -2.15800517, -10.27719409, -24.2787015 ],
       [ -0.12760092,  -6.88647458, -19.30588516, -19.13187694],
       [-14.61458958, -26.40369262, -25.54350369, -20.77586995],
       [-29.85355293,  -7.83084462,   1.98060184,  -2.62357645]])

### pandas

Los dataframes son varias series seguidas

In [24]:
# Forma muy explícita
pd.DataFrame([[2, 3], [4, 5]], index = ["a", "b"], columns = ["W", "X"])

Unnamed: 0,W,X
a,2,3
b,4,5


In [26]:
# default columnas y filas comienzas en 0
pd.DataFrame(array)

Unnamed: 0,0,1,2,3
0,-19.747683,-2.158005,-10.277194,-24.278701
1,-0.127601,-6.886475,-19.305885,-19.131877
2,-14.61459,-26.403693,-25.543504,-20.77587
3,-29.853553,-7.830845,1.980602,-2.623576


In [56]:
# si queremos que el conteo de filas y columnas comience desde 1
df = pd.DataFrame(array, index = range(1, len(array)+ 1), columns = ["W", "X", "Y", "Z"])
df

Unnamed: 0,W,X,Y,Z
1,-19.747683,-2.158005,-10.277194,-24.278701
2,-0.127601,-6.886475,-19.305885,-19.131877
3,-14.61459,-26.403693,-25.543504,-20.77587
4,-29.853553,-7.830845,1.980602,-2.623576


In [31]:
# columna, fila
df["X"][2]

np.float64(-6.886474583708912)

In [37]:
# también lo puedo mandar a llamar como una propiedad
df.X

1    -2.158005
2    -6.886475
3   -26.403693
4    -7.830845
Name: X, dtype: float64

In [43]:
df[:2]

Unnamed: 0,W,X,Y,Z
1,-19.747683,-2.158005,-10.277194,-24.278701
2,-0.127601,-6.886475,-19.305885,-19.131877


In [45]:
# más de una columna --> debemos crear una lista de nuestro interes (dentro de otro set de corchetes)
df[["W", "Y"]]

Unnamed: 0,W,Y
1,-19.747683,-10.277194
2,-0.127601,-19.305885
3,-14.61459,-25.543504
4,-29.853553,1.980602


### operaciones dentro del dataframe

In [46]:
df.X + df.Z

1   -26.436707
2   -26.018352
3   -47.179563
4   -10.454421
dtype: float64

In [57]:
# los dataframes son mutables
df["suma_X_Z"] = df.X + df.Z # le estamos agregando una nueva columna a nuestro dataframe
df

Unnamed: 0,W,X,Y,Z,suma_X_Z
1,-19.747683,-2.158005,-10.277194,-24.278701,-26.436707
2,-0.127601,-6.886475,-19.305885,-19.131877,-26.018352
3,-14.61459,-26.403693,-25.543504,-20.77587,-47.179563
4,-29.853553,-7.830845,1.980602,-2.623576,-10.454421


In [52]:
# para borrar columnas necesitamos usar un método, especificar lo que es una columna. axis = 1 --> columna | axis = 0 --> filas
# drop es temporal, no reemplaza
df.drop("Y", axis = 1)

Unnamed: 0,W,X,Z,suma_X_Z
1,-19.747683,-2.158005,-24.278701,-26.436707
2,-0.127601,-6.886475,-19.131877,-26.018352
3,-14.61459,-26.403693,-20.77587,-47.179563
4,-29.853553,-7.830845,-2.623576,-10.454421


In [58]:
df.drop("Y", axis = 1, inplace=True) # inplace = True --> reemplaza

In [54]:
df

Unnamed: 0,W,X,Z,suma_X_Z
1,-19.747683,-2.158005,-24.278701,-26.436707
2,-0.127601,-6.886475,-19.131877,-26.018352
3,-14.61459,-26.403693,-20.77587,-47.179563
4,-29.853553,-7.830845,-2.623576,-10.454421


#### Para localizar filas

In [59]:
df.loc[3]

W          -14.614590
X          -26.403693
Z          -20.775870
suma_X_Z   -47.179563
Name: 3, dtype: float64

In [61]:
# localización del indice
df.iloc[2]

W          -14.614590
X          -26.403693
Z          -20.775870
suma_X_Z   -47.179563
Name: 3, dtype: float64

In [62]:
df.iloc[[0,2]]

Unnamed: 0,W,X,Z,suma_X_Z
1,-19.747683,-2.158005,-24.278701,-26.436707
3,-14.61459,-26.403693,-20.77587,-47.179563


In [63]:
df

Unnamed: 0,W,X,Z,suma_X_Z
1,-19.747683,-2.158005,-24.278701,-26.436707
2,-0.127601,-6.886475,-19.131877,-26.018352
3,-14.61459,-26.403693,-20.77587,-47.179563
4,-29.853553,-7.830845,-2.623576,-10.454421


In [65]:
# para obtener un valor en específico --> fila, columna
df.loc[3, "X"]

np.float64(-26.403692621848414)

In [68]:
df.loc[[2, 4], ["X", "suma_X_Z"]]

Unnamed: 0,X,suma_X_Z
2,-6.886475,-26.018352
4,-7.830845,-10.454421
