# Operaciones

Los `DataFrames` ofrecen varias operaciones básicas, el listado completo se puede encontrar en la [documentación](https://pandas.pydata.org/docs/user_guide/basics.html). 

## Rango de fechas

In [1]:
import pandas as pd
import numpy as np
import time

# Rango de fechas para usar de índice en un dataframe
index = pd.date_range("7/15/2022", periods=20)

index

DatetimeIndex(['2022-07-15', '2022-07-16', '2022-07-17', '2022-07-18',
               '2022-07-19', '2022-07-20', '2022-07-21', '2022-07-22',
               '2022-07-23', '2022-07-24', '2022-07-25', '2022-07-26',
               '2022-07-27', '2022-07-28', '2022-07-29', '2022-07-30',
               '2022-07-31', '2022-08-01', '2022-08-02', '2022-08-03'],
              dtype='datetime64[ns]', freq='D')

## Consultas rápidas

In [2]:
# Lo utilizamos para rellenar un df con valores aleatorios
df = pd.DataFrame(np.random.randn(20, 4), 
                  index=index, 
                  columns=["A", "B", "C", "D"])

df

Unnamed: 0,A,B,C,D
2022-07-15,-0.864802,-0.16942,1.441043,0.546293
2022-07-16,0.99456,-0.75818,1.927348,0.165799
2022-07-17,-3.032099,-0.801688,-0.296782,-1.274856
2022-07-18,0.351825,0.077653,1.107364,1.940084
2022-07-19,0.332507,-0.415064,0.658136,-0.606937
2022-07-20,-0.346041,-0.996827,-1.378664,-0.200979
2022-07-21,0.358534,1.085835,0.146236,0.406883
2022-07-22,-1.299341,-0.208419,-0.643631,-0.411157
2022-07-23,2.129927,1.84523,1.086097,0.654514
2022-07-24,0.442936,1.108754,-0.399739,0.382106


In [3]:
# Primeras filas (cabeza)
df.head()

Unnamed: 0,A,B,C,D
2022-07-15,-0.864802,-0.16942,1.441043,0.546293
2022-07-16,0.99456,-0.75818,1.927348,0.165799
2022-07-17,-3.032099,-0.801688,-0.296782,-1.274856
2022-07-18,0.351825,0.077653,1.107364,1.940084
2022-07-19,0.332507,-0.415064,0.658136,-0.606937


In [7]:
# Primeras tres filas
df.head(1)

Unnamed: 0,A,B,C,D
2022-07-15,-0.864802,-0.16942,1.441043,0.546293


In [8]:
# Últimas filas (cola)
df.tail()

Unnamed: 0,A,B,C,D
2022-07-30,-0.561331,-0.043219,0.713837,0.904588
2022-07-31,-0.262575,0.283732,0.444264,-0.163882
2022-08-01,1.125319,-0.653044,-1.08316,-0.151979
2022-08-02,-2.217387,-0.22195,1.710598,1.66587
2022-08-03,1.089649,-1.462177,0.620652,-0.389677


In [9]:
# Últimas tres filas
df.tail(3)

Unnamed: 0,A,B,C,D
2022-08-01,1.125319,-0.653044,-1.08316,-0.151979
2022-08-02,-2.217387,-0.22195,1.710598,1.66587
2022-08-03,1.089649,-1.462177,0.620652,-0.389677


## Valores únicos

In [10]:
# Definimos un DataFrame con información de diferentes tipos
df = pd.DataFrame({
      'enteros': [100, 200, 300, 400],
    'decimales': [3.14, 2.72, 1.618, 3.14],
      'cadenas': ['hola','adiós','hola','adiós']})

df

Unnamed: 0,enteros,decimales,cadenas
0,100,3.14,hola
1,200,2.72,adiós
2,300,1.618,hola
3,400,3.14,adiós


In [23]:
df.loc[4] = [8, 8, 'lobo']
df

Unnamed: 0,enteros,decimales,cadenas
0,100,3.14,hola
1,200,2.72,adiós
2,300,1.618,hola
3,400,3.14,adiós
4,8,8.0,lobo


In [19]:
df.loc[4].value_counts()

8    2
5    1
Name: 4, dtype: int64

In [11]:
# Array de valores únicos de una columna
df['cadenas'].unique()

array(['hola', 'adiós'], dtype=object)

In [12]:
# Contador de valores únicos de una columna
df['cadenas'].nunique()

2

In [13]:
# Dataframe con los de valores únicos y su contador de una columna
df['cadenas'].value_counts()

hola     2
adiós    2
Name: cadenas, dtype: int64

## Aplicación de funciones

In [25]:
df

Unnamed: 0,enteros,decimales,cadenas
0,100,3.14,hola
1,200,2.72,adiós
2,300,1.618,hola
3,400,3.14,adiós
4,8,8.0,lobo


In [26]:
# Método interno de las Series columna
df.sum()

enteros                        1008
decimales                    18.618
cadenas      holaadiósholaadióslobo
dtype: object

In [24]:
# Aplicar una función predefinida
df['cadenas'].apply(len)

0    4
1    5
2    4
3    5
4    4
Name: cadenas, dtype: int64

In [27]:
df

Unnamed: 0,enteros,decimales,cadenas
0,100,3.14,hola
1,200,2.72,adiós
2,300,1.618,hola
3,400,3.14,adiós
4,8,8.0,lobo


In [28]:
# Aplicar una función definida
def doblar(n):
    return n*2

df['duplicados'] = df['enteros'].apply(doblar)
df

Unnamed: 0,enteros,decimales,cadenas,duplicados
0,100,3.14,hola,200
1,200,2.72,adiós,400
2,300,1.618,hola,600
3,400,3.14,adiós,800
4,8,8.0,lobo,16


In [32]:
df['duplicados2'] = doblar(df['enteros'])
df

Unnamed: 0,enteros,decimales,cadenas,duplicados,duplicados2
0,100,3.14,hola,200,200
1,200,2.72,adiós,400,400
2,300,1.618,hola,600,600
3,400,3.14,adiós,800,800
4,8,8.0,lobo,16,16


In [29]:
df['enteros'].sum()

1008

In [33]:
df

Unnamed: 0,enteros,decimales,cadenas,duplicados,duplicados2
0,100,3.14,hola,200,200
1,200,2.72,adiós,400,400
2,300,1.618,hola,600,600
3,400,3.14,adiós,800,800
4,8,8.0,lobo,16,16


In [34]:
def dividir_en_3(n: float):
    return n/3

dividir_en_3(12)

4.0

In [36]:
f = lambda n: n/3
f(12)

4.0

In [37]:
f = lambda n, m, t: n + m +t[0]
f(3, 5, [1, 2, 3])

9

In [41]:
# Aplicar una función anónima
df['decimales'].apply(lambda n: n/3)

0    1.046667
1    0.906667
2    0.539333
3    1.046667
4    2.666667
Name: decimales, dtype: float64

In [None]:
df

In [42]:
# Borrar permanentemente una columna
del df['decimales']

In [43]:
df

Unnamed: 0,enteros,cadenas,duplicados,duplicados2
0,100,hola,200,200
1,200,adiós,400,400
2,300,hola,600,600
3,400,adiós,800,800
4,8,lobo,16,16


## Recuperar índices

In [44]:
# Índices de las columnas
df.columns

Index(['enteros', 'cadenas', 'duplicados', 'duplicados2'], dtype='object')

In [45]:
df.columns.to_list()

['enteros', 'cadenas', 'duplicados', 'duplicados2']

In [46]:
# Índice de las filas
df.index

Int64Index([0, 1, 2, 3, 4], dtype='int64')

## Aplicar ordenaciones

In [47]:
df

Unnamed: 0,enteros,cadenas,duplicados,duplicados2
0,100,hola,200,200
1,200,adiós,400,400
2,300,hola,600,600
3,400,adiós,800,800
4,8,lobo,16,16


In [51]:
# Ordenar por columna (inplace=False por defecto)
df.sort_values(by='enteros', ascending=True)

Unnamed: 0,enteros,cadenas,duplicados,duplicados2
4,8,lobo,16,16
0,100,hola,200,200
1,200,adiós,400,400
2,300,hola,600,600
3,400,adiós,800,800


In [52]:
# Ordenar por columna inversamente (inplace=False por defecto)
df.sort_values(by='enteros', ascending=False)

Unnamed: 0,enteros,cadenas,duplicados,duplicados2
3,400,adiós,800,800
2,300,hola,600,600
1,200,adiós,400,400
0,100,hola,200,200
4,8,lobo,16,16
