# Manejo de datos con Pandas
Ejemplos del capítulo 3 de "Python Data Science Handbook".

In [4]:
import numpy as np
import pandas as pd

### Creación de objetos "Series"

In [5]:
datos=pd.Series([1.05,3.2,0.32,1.68,2.39,3.84])
datos

0    1.05
1    3.20
2    0.32
3    1.68
4    2.39
5    3.84
dtype: float64

In [6]:
datos.values

array([1.05, 3.2 , 0.32, 1.68, 2.39, 3.84])

In [7]:
datos.index

RangeIndex(start=0, stop=6, step=1)

In [8]:
datos[1]

3.2

In [9]:
datos[2:5]

2    0.32
3    1.68
4    2.39
dtype: float64

In [10]:
datos2=pd.Series([1.05, 3.2 , 0.32, 1.68, 2.39, 3.84],index=["a","b","c","d","f","g"])
datos2

a    1.05
b    3.20
c    0.32
d    1.68
f    2.39
g    3.84
dtype: float64

In [11]:
datos2["a"]

1.05

In [12]:
datos3=pd.Series([1.05, 3.2 , 0.32, 1.68, 2.39, 3.84],index=['f','hola','ba','as','h','g'])
datos3

f       1.05
hola    3.20
ba      0.32
as      1.68
h       2.39
g       3.84
dtype: float64

In [13]:
datos3['f':'h']

f       1.05
hola    3.20
ba      0.32
as      1.68
h       2.39
dtype: float64

In [14]:
pd.Series(1,index=[233,411,655,992])

233    1
411    1
655    1
992    1
dtype: int64

### Creación de objetos "DataFrame"

In [15]:
datos4=pd.Series(['haciendo', 'tabla' , 'de','ejemplo','aa', 'hola'],index=['f','hola','ba','as','h','g'])

In [16]:
tabla1=pd.DataFrame({'tabla3':datos3,'tabla4':datos4})
tabla1

Unnamed: 0,tabla3,tabla4
f,1.05,haciendo
hola,3.2,tabla
ba,0.32,de
as,1.68,ejemplo
h,2.39,aa
g,3.84,hola


In [17]:
tabla1.index

Index(['f', 'hola', 'ba', 'as', 'h', 'g'], dtype='object')

In [18]:
tabla1.columns

Index(['tabla3', 'tabla4'], dtype='object')

In [19]:
tabla1['tabla3']

f       1.05
hola    3.20
ba      0.32
as      1.68
h       2.39
g       3.84
Name: tabla3, dtype: float64

In [20]:
tabla2=[{'a':i*2,'b':i**2}for i in range(6)]
pd.DataFrame(tabla2)

Unnamed: 0,a,b
0,0,0
1,2,1
2,4,4
3,6,9
4,8,16
5,10,25


In [21]:
pd.DataFrame(np.random.rand(4, 2),columns=['col1', 'col2'],index=['a', 'b', 'c','d'])

Unnamed: 0,col1,col2
a,0.23427,0.739951
b,0.600559,0.042098
c,0.217339,0.407382
d,0.322167,0.566666


### Uso del objeto Index

In [22]:
ind=pd.Index([1,2,3,4])
ind

Int64Index([1, 2, 3, 4], dtype='int64')

In [23]:
ind[3]

4

In [25]:
print(ind.size, ind.shape, ind.ndim, ind.dtype)

4 (4,) 1 int64


In [26]:
ind[1]=0

TypeError: Index does not support mutable operations

In [27]:
ind1=pd.Index([1,2,3,4,5])
ind2=pd.Index([2,3,4,5,6])
ind1 & ind2 #intersección

Int64Index([2, 3, 4, 5], dtype='int64')

In [28]:
ind1|ind2 #union

Int64Index([1, 2, 3, 4, 5, 6], dtype='int64')

In [29]:
ind1^ind2 #diferencia simétrica (unión - intersección)

Int64Index([1, 6], dtype='int64')