# La clase DataFrame

In [2]:
# Creamos un dataframe
import pandas as pd
import numpy as np

array = np.random.randint(-10,10, size= (4,4))
print(array)

[[-5 -3 -2 -5]
 [-8 -5 -5 -4]
 [ 6  5  1  9]
 [ 4 -2 -5  7]]


In [73]:
df = pd.DataFrame(array, index= ['A', 'B', 'C', 'D'], columns= ['W', 'X', 'Y', 'Z'])
print(df)

   W  X  Y  Z
A -5 -3 -2 -5
B -8 -5 -5 -4
C  6  5  1  9
D  4 -2 -5  7


In [10]:
type(df) # es un frame de la clase dataframe de pandas

pandas.core.frame.DataFrame

## Trabajando con DataFrames

Podemos consultar una columna mediante su nombre

In [12]:
df['X']

A    -3
B,   -5
C     5
D    -2
Name: X, dtype: int32

In [14]:
type(df['X']) #es una serie de pandas, cada columna, por lo tanto, lo será

pandas.core.series.Series

Añadir una columna

In [75]:
df['TOTAL'] = df['W']+ df['X']+ df['Y']+ df['Z']
df

Unnamed: 0,W,X,Y,Z,TOTAL
A,-5,-3,-2,-5,-15
B,-8,-5,-5,-4,-22
C,6,5,1,9,21
D,4,-2,-5,7,4


Borrar una columna

In [77]:
df.drop('TOTAL', axis= 1) # no modifica el original, elimina la columna


Unnamed: 0,W,X,Y,Z
A,-5,-3,-2,-5
B,-8,-5,-5,-4
C,6,5,1,9
D,4,-2,-5,7


In [79]:
df

Unnamed: 0,W,X,Y,Z,TOTAL
A,-5,-3,-2,-5,-15
B,-8,-5,-5,-4,-22
C,6,5,1,9,21
D,4,-2,-5,7,4


In [81]:
# Para que el borrado sea permanente
df.drop('TOTAL', axis= 1, inplace= True)
df

Unnamed: 0,W,X,Y,Z
A,-5,-3,-2,-5
B,-8,-5,-5,-4
C,6,5,1,9
D,4,-2,-5,7


In [None]:
Borrar una fila


In [85]:
df.drop('D', axis= 0) # con inplace = True la borras definitivamente

Unnamed: 0,W,X,Y,Z
A,-5,-3,-2,-5
B,-8,-5,-5,-4
C,6,5,1,9


## Seleccionar filas

In [89]:
df.loc['C']

W    6
X    5
Y    1
Z    9
Name: C, dtype: int32

In [93]:
df.iloc[2]

W    6
X    5
Y    1
Z    9
Name: C, dtype: int32

## Seleccionar una subtabla (subset)

In [95]:
# Fila C y la columna Z
df.loc['C', 'Z']

9

In [97]:
# Filas A,B columnas W,Y
df.loc[['A', 'B'],['W', 'Y']]

Unnamed: 0,W,Y
A,-5,-2
B,-8,-5


## Selección condicional

In [99]:
df

Unnamed: 0,W,X,Y,Z
A,-5,-3,-2,-5
B,-8,-5,-5,-4
C,6,5,1,9
D,4,-2,-5,7


In [103]:
df > 0 # devuelve true o false en función de condición

Unnamed: 0,W,X,Y,Z
A,False,False,False,False
B,False,False,False,False
C,True,True,True,True
D,True,False,False,True


In [109]:
# devuelve los valores de los registros si cumplen una condición
df[df>0] # devuelve nulo los que no cumple

Unnamed: 0,W,X,Y,Z
A,,,,
B,,,,
C,6.0,5.0,1.0,9.0
D,4.0,,,7.0


In [111]:
# valor de los registros cuando X > 0
df[df['X'] > 0]

Unnamed: 0,W,X,Y,Z
C,6,5,1,9


In [115]:
# ahora sólo los valores de la columna Y y Z
df[df['X'] > 0][['Y', 'Z']] # se puede añadir condicionales con operador or o and

Unnamed: 0,Y,Z
C,1,9


## Modificar los índices

In [119]:
# creamos un nuevo dataframe
array = np.random.uniform(-10, 10, size= [4,4])
df = pd.DataFrame(array, index= ['A', 'B', 'C', 'D'], columns= ['W', 'X', 'Y', 'Z'])

In [121]:
df

Unnamed: 0,W,X,Y,Z
A,-7.212774,7.313248,-5.13116,2.890111
B,0.102957,0.159991,5.026325,-2.023941
C,3.477626,-9.850022,8.415997,-6.502085
D,8.793928,-1.107442,3.746837,-4.322999


In [151]:
# añadimos una nueva serie o columna
df['CODIGOS'] = ['AA', 'BB', 'CC', 'DD']
df

Unnamed: 0_level_0,W,X,Y,Z,CODIGOS
CODIGOS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AA,-7.212774,7.313248,-5.13116,2.890111,AA
BB,0.102957,0.159991,5.026325,-2.023941,BB
CC,3.477626,-9.850022,8.415997,-6.502085,CC
DD,8.793928,-1.107442,3.746837,-4.322999,DD


In [153]:
#substituimos los índices de las filas
df.set_index('CODIGOS')

Unnamed: 0_level_0,W,X,Y,Z
CODIGOS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AA,-7.212774,7.313248,-5.13116,2.890111
BB,0.102957,0.159991,5.026325,-2.023941
CC,3.477626,-9.850022,8.415997,-6.502085
DD,8.793928,-1.107442,3.746837,-4.322999


In [155]:
df

Unnamed: 0_level_0,W,X,Y,Z,CODIGOS
CODIGOS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AA,-7.212774,7.313248,-5.13116,2.890111,AA
BB,0.102957,0.159991,5.026325,-2.023941,BB
CC,3.477626,-9.850022,8.415997,-6.502085,CC
DD,8.793928,-1.107442,3.746837,-4.322999,DD


In [157]:
df.set_index('CODIGOS', inplace = True) # para que quede definitivo usamos el inplace
df

Unnamed: 0_level_0,W,X,Y,Z
CODIGOS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AA,-7.212774,7.313248,-5.13116,2.890111
BB,0.102957,0.159991,5.026325,-2.023941
CC,3.477626,-9.850022,8.415997,-6.502085
DD,8.793928,-1.107442,3.746837,-4.322999


In [159]:
# hacemos una consulta con el nuevo índice
df.loc['AA']

W   -7.212774
X    7.313248
Y   -5.131160
Z    2.890111
Name: AA, dtype: float64

## Reset de índices (índices por defecto)

In [161]:
df.reset_index(drop= True, inplace= True)
df

Unnamed: 0,W,X,Y,Z
0,-7.212774,7.313248,-5.13116,2.890111
1,0.102957,0.159991,5.026325,-2.023941
2,3.477626,-9.850022,8.415997,-6.502085
3,8.793928,-1.107442,3.746837,-4.322999
