# Operaciones

Los `DataFrames` ofrecen varias operaciones básicas, el listado completo se puede encontrar en la [documentación](https://pandas.pydata.org/docs/user_guide/basics.html). 

## Rango de fechas

In [2]:
import pandas as pd
import numpy as np
import time

# Rango de fechas para usar de índice en un dataframe
index = pd.date_range("7/15/2022", periods=20)

index

DatetimeIndex(['2022-07-15', '2022-07-16', '2022-07-17', '2022-07-18',
               '2022-07-19', '2022-07-20', '2022-07-21', '2022-07-22',
               '2022-07-23', '2022-07-24', '2022-07-25', '2022-07-26',
               '2022-07-27', '2022-07-28', '2022-07-29', '2022-07-30',
               '2022-07-31', '2022-08-01', '2022-08-02', '2022-08-03'],
              dtype='datetime64[ns]', freq='D')

## Consultas rápidas

In [3]:
# Lo utilizamos para rellenar un df con valores aleatorios
df = pd.DataFrame(np.random.randn(20, 4), index=index, columns=["A", "B", "C", "D"])

df

Unnamed: 0,A,B,C,D
2022-07-15,0.216686,0.51928,-0.233816,-0.09108
2022-07-16,-0.350368,0.511798,0.942827,0.322414
2022-07-17,-0.773544,0.542909,-0.123508,-0.741945
2022-07-18,-0.044861,0.305886,1.288083,-1.040461
2022-07-19,-0.266718,0.449158,0.20439,0.594695
2022-07-20,0.090266,0.415604,0.265228,-0.060308
2022-07-21,1.319314,-1.783714,1.820139,0.655126
2022-07-22,-0.568118,-1.115967,-0.008616,1.395041
2022-07-23,0.39347,-0.568991,-0.775325,-0.468474
2022-07-24,-1.62959,1.494793,-0.353616,-0.342571


In [4]:
# Primeras filas (cabeza)
df.head()

Unnamed: 0,A,B,C,D
2022-07-15,0.216686,0.51928,-0.233816,-0.09108
2022-07-16,-0.350368,0.511798,0.942827,0.322414
2022-07-17,-0.773544,0.542909,-0.123508,-0.741945
2022-07-18,-0.044861,0.305886,1.288083,-1.040461
2022-07-19,-0.266718,0.449158,0.20439,0.594695


In [6]:
# Primeras tres filas
df.head(8)

Unnamed: 0,A,B,C,D
2022-07-15,0.216686,0.51928,-0.233816,-0.09108
2022-07-16,-0.350368,0.511798,0.942827,0.322414
2022-07-17,-0.773544,0.542909,-0.123508,-0.741945
2022-07-18,-0.044861,0.305886,1.288083,-1.040461
2022-07-19,-0.266718,0.449158,0.20439,0.594695
2022-07-20,0.090266,0.415604,0.265228,-0.060308
2022-07-21,1.319314,-1.783714,1.820139,0.655126
2022-07-22,-0.568118,-1.115967,-0.008616,1.395041


In [7]:
# Últimas filas (cola)
df.tail()

Unnamed: 0,A,B,C,D
2022-07-30,-0.248122,-0.600457,-1.000254,0.09422
2022-07-31,0.58339,-1.18782,-0.392313,-0.376022
2022-08-01,1.112807,-0.269505,-0.625082,-0.445422
2022-08-02,0.496405,1.997319,1.059864,-0.828339
2022-08-03,-1.104031,-0.158482,1.222347,1.500531


In [8]:
# Últimas tres filas
df.tail(3)

Unnamed: 0,A,B,C,D
2022-08-01,1.112807,-0.269505,-0.625082,-0.445422
2022-08-02,0.496405,1.997319,1.059864,-0.828339
2022-08-03,-1.104031,-0.158482,1.222347,1.500531


## Valores únicos

In [19]:
# Definimos un DataFrame con información de diferentes tipos
df = pd.DataFrame({
      'enteros': [100, 200, 300, 400],
    'decimales': [3.14, 2.72, 1.618, 3.14],
      'cadenas': ['hola','adiós','hola','adiós']})

df

Unnamed: 0,enteros,decimales,cadenas
0,100,3.14,hola
1,200,2.72,adiós
2,300,1.618,hola
3,400,3.14,adiós


In [13]:
# Array de valores únicos de una columna
df['cadenas'].unique()

array(['hola', 'adiós'], dtype=object)

In [14]:
# Contador de valores únicos de una columna
df['cadenas'].nunique()

2

In [15]:
# Dataframe con los de valores únicos y su contador de una columna
df['cadenas'].value_counts()

hola     2
adiós    2
Name: cadenas, dtype: int64

## Aplicación de funciones

In [16]:
df

Unnamed: 0,enteros,decimales,cadenas
0,100,3.14,hola
1,200,2.72,adiós
2,300,1.618,hola
3,400,3.14,adiós


In [17]:
# Método interno de las Series columna
df['decimales'].sum()

10.618

In [18]:
# Aplicar una función predefinida
df['cadenas'].apply(len)

0    4
1    5
2    4
3    5
Name: cadenas, dtype: int64

In [21]:
df

Unnamed: 0,enteros,decimales,cadenas
0,100,3.14,hola
1,200,2.72,adiós
2,300,1.618,hola
3,400,3.14,adiós


In [25]:
# Aplicar una función definida
def doblar(n):
    return n*2

df['enteros'] = df['enteros'].apply(doblar)
df

Unnamed: 0,enteros,decimales,cadenas
0,400,3.14,hola
1,800,2.72,adiós
2,1200,1.618,hola
3,1600,3.14,adiós


In [26]:
df

Unnamed: 0,enteros,decimales,cadenas
0,400,3.14,hola
1,800,2.72,adiós
2,1200,1.618,hola
3,1600,3.14,adiós


In [28]:
def dividir_en_3(n: float):
    return n/3

dividir_en_3(12)

4.0

In [None]:
f = lambda n: n/3
f(3

In [31]:
f = lambda n, m, t: n + m +t[0]
f(3, 5, [1, 2, 3])

9

In [32]:
# Aplicar una función anónima
df['enteros'].apply(lambda n: n/3)

0    133.333333
1    266.666667
2    400.000000
3    533.333333
Name: enteros, dtype: float64

In [33]:
df

Unnamed: 0,enteros,decimales,cadenas
0,400,3.14,hola
1,800,2.72,adiós
2,1200,1.618,hola
3,1600,3.14,adiós


In [34]:
# Borrar permanentemente una columna
del df['decimales']

In [35]:
df

Unnamed: 0,enteros,cadenas
0,400,hola
1,800,adiós
2,1200,hola
3,1600,adiós


## Recuperar índices

In [36]:
# Índices de las columnas
df.columns

Index(['enteros', 'cadenas'], dtype='object')

In [38]:
df.columns.to_list()

['enteros', 'cadenas']

In [39]:
# Índice de las filas
df.index

RangeIndex(start=0, stop=4, step=1)

## Aplicar ordenaciones

In [40]:
df

Unnamed: 0,enteros,cadenas
0,400,hola
1,800,adiós
2,1200,hola
3,1600,adiós


In [41]:
# Ordenar por columna (inplace=False por defecto)
df.sort_values(by='enteros')

Unnamed: 0,enteros,cadenas
0,400,hola
1,800,adiós
2,1200,hola
3,1600,adiós


In [42]:
# Ordenar por columna inversamente (inplace=False por defecto)
df.sort_values(by='enteros',ascending=False)

Unnamed: 0,enteros,cadenas
3,1600,adiós
2,1200,hola
1,800,adiós
0,400,hola
