In [None]:
import numpy as np
import pandas as pd

# Creación de un dataframe

### Creación desde una lista

In [None]:
data = [1,2,3,4,5]
df = pd.DataFrame(data) # Función para crear dataframes
df

In [None]:
data = [['Alex',10],['Bob',12],['Clarke',13]]
df = pd.DataFrame( data,columns=['Name','Age'])
df

### Creación desde un diccionario

In [None]:
data = {'Name':['Tom', 'Jack', 'Steve', 'Ricky'],'Age':[28,34,29,42]}
df = pd.DataFrame(data)
df

In [None]:
data = {'Name':['Tom', 'Jack', 'Steve', 'Ricky'],'Age':[28,34,29,42]}
df = pd.DataFrame(data, index=['rank1','rank2','rank3','rank4'])
df

In [None]:
df = pd.DataFrame({'A': 1.,
                     'B': pd.Timestamp('20130102'),
                     'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                     'D': np.array([3] * 4, dtype='int32'),
                     'E': ["test", "train", "test", "train"],
                     'F': 'foo'})
df

Tipo de dato de las columnas

In [None]:
df.dtypes

In [None]:
df.info()

### Creación desde una lista de diccionarios

In [None]:
data = [{'a': 1, 'b': 2},{'a': 5, 'b': 10, 'c': 20}]
df = pd.DataFrame(data, index=['first', 'second'])
df

In [None]:
df.info()

# Observar los datos

Crear rango de fechas

In [None]:
dates = pd.date_range('20200201', periods=20)
dates

Crear dataframe

In [None]:
df = pd.DataFrame(np.random.randn(20, 4), index=dates, columns=['A','B','C','D']) # "standard normal" distribution

In [None]:
df

### Head and tail

Ver los 5 primeros registros

In [None]:
df.head()

Ver los 10 primeros registros

In [None]:
df.head(10)

Ver los 5 últimos registros

In [None]:
df.tail(10)

# Describe

In [None]:
df.describe()

# Transponer

In [None]:
df.T

# Ordenar

Ordenar por un "axis".

In [None]:
df.sort_index(axis=1, ascending=False).head() # axis=1 columnas

In [None]:
df.sort_index(axis=0, ascending=False).head() # axis=0 filas

Ordenar por valor

In [None]:
df.sort_values(by='B',ascending=False)

# Selección

Selección de columnas

In [None]:
df['A'].head(10)

In [None]:
df['A','B']

In [None]:
df[['A','B']].head()

Selección de filas

In [None]:
df[0:3]

In [None]:
df['20200210':'20200215']

### Selección por etiqueta

In [None]:
dates[0]

In [None]:
df.loc[dates[0]]

In [None]:
df.loc[:, ['A', 'B']].head()

In [None]:
df.loc['20200210':'20200215', ['A', 'B']]

Reducción en las dimensiones del objeto devuelto

In [None]:
df.loc['20200210', ['A', 'B']]

Para obtener un valor escalar

In [None]:
df.loc['20200210','A']

### Selección por posición

Con enteros

In [None]:
df.iloc[3]

Con rango de enteros

In [None]:
df.iloc[3:5, 0:2]

Con listas de enteros

In [None]:
df.iloc[[1, 2, 4], [0, 2]]

Seleccionar filas

In [None]:
df.iloc[1:3, :]

Seleccionar columnas

In [None]:
df.iloc[:, 1:3]

Obtener un valor

In [None]:
df.iloc[1, 1]

In [None]:
df.iat[1, 1]

### Selección por condición

In [None]:
df.head()

In [None]:
df[(df['A'] > 0)&(df['C'] > 0)][['A','C']]

In [None]:
df[df > 0]

# Operaciones

In [None]:
df.mean(axis=1)

In [None]:
df.mean(0)

In [None]:
# median
# sum
# max
# min

### Apply

In [None]:
df.apply(lambda x: x.max() - x.min())

In [None]:
df.head()

In [None]:
df['A_2'] = df['A'].apply(lambda x: x**2)

In [None]:
df.head()

# Unir

### Concat

In [None]:
df = pd.DataFrame(np.random.randn(10, 4))
df

In [None]:
df[:3]

In [None]:
pd.concat([df[:3], df[3:7], df[7:]],axis=0)

In [None]:
pd.concat([df[:3], df[:3], df[:3]],axis=1,ignore_index=True)

# Join

![Imagen](https://www.upgrad.com/blog/wp-content/uploads/2020/09/Joins-in-SQL-Inner-Outer-Left-and-Right-Join.jpg)

![Imagen](https://miro.medium.com/max/724/1*-I_1qa5TIiB5eNYxnodfAA.png)

In [None]:
left = pd.DataFrame({'key': ['foo', 'bar','cat'], 'lval': [1, 2,3]})
left

In [None]:
right = pd.DataFrame({'key': ['foo', 'bar','dog'], 'rval': [4, 5,6]})
right

### Inner join

In [None]:
pd.merge(left, right, how='inner', on='key')

### Left join 

In [None]:
pd.merge(left, right, how='left', on='key')

### Right join

In [None]:
pd.merge(left, right, how='right', on=['key'])

# Grouping

In [None]:
df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar','foo', 'bar', 'foo', 'foo'],
                   'B': ['one', 'one', 'two', 'three','two', 'two', 'one', 'three'],
                   'C': np.random.randn(8),
                   'D': np.random.randn(8)})
df

In [None]:
df.groupby('A').sum()

In [None]:
df.groupby('A').mean()

In [None]:
df.groupby(['A', 'B']).max()

# Pivot

In [None]:
df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 3,
                   'B': ['A', 'B', 'C'] * 4,
                   'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
                   'D': np.random.randn(12),
                   'E': np.random.randn(12)})
df

In [None]:
pd.pivot_table(df, values='D', index=['A'], columns=['C'],aggfunc='mean',fill_value=None)

# Eliminar

In [None]:
df.head()

Eliminar fila

In [None]:
df.drop(0).head()

Eliminar columna

In [None]:
df.drop(['A','B'],axis=1,inplace=True)

In [None]:
df