# Como seleccionar datos en Pandas

In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.Series([0.25, 0.5, 0.75, 1.0],
                 index=['a', 'b', 'c', 'd'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [3]:
data['b']

0.5

In [5]:
'c' in data, 'z' in data

(True, False)

In [6]:
data.keys(), data.index

(Index(['a', 'b', 'c', 'd'], dtype='object'),
 Index(['a', 'b', 'c', 'd'], dtype='object'))

In [8]:
[v for v in data.items()]

[('a', 0.25), ('b', 0.5), ('c', 0.75), ('d', 1.0)]

In [9]:
list(data.items())

[('a', 0.25), ('b', 0.5), ('c', 0.75), ('d', 1.0)]

In [7]:
data['z'] = -1.0

In [8]:
data

a    0.25
b    0.50
c    0.75
d    1.00
z   -1.00
dtype: float64

In [13]:
data['a':'c'] # data slice, o rebanada de datos

a    0.25
b    0.50
c    0.75
dtype: float64

In [14]:
data[(data > 0.3) & (data < 0.8)] # selección por valor, A & B indica que necesitan cumplirse las condiciones A y B 

b    0.50
c    0.75
dtype: float64

In [15]:
data[['a', 'z']] # indexado con nombres de fila

a    0.25
z   -1.00
dtype: float64

## Indexadores

In [16]:
data = pd.Series(['a', 'b', 'c'], index=[1, 3, 5])
data

1    a
3    b
5    c
dtype: object

In [18]:
data[1] # Indice explicito, pero confuso

'a'

In [20]:
data[1:3] # Que pasó?

3    b
5    c
dtype: object

In [22]:
data.loc[1]      # Estilo pandas

'a'

In [23]:
data.loc[1:3]    # Estilo pandas

1    a
3    b
dtype: object

In [24]:
data.iloc[1]    # Estilo python

'b'

In [26]:
data.iloc[1:3]    # Estilo python

3    b
5    c
dtype: object

## DataFrames

In [63]:
a = pd.Series({'Buenos Aires': 423967, 'Chubut': 695662,
                  'Santa Fe': 141297, 'Cordoba': 170312,
                  'Chaco': 149995})
p = pd.Series({'Buenos Aires': 38332521, 'Chubut': 264489,
                 'Santa Fe': 5965110, 'Cordoba': 1955209,
                 'Chaco': 12882})
data = pd.DataFrame({'area':a, 'poblacion':p, 'densidad': p / a})
data

Unnamed: 0,area,poblacion,densidad
Buenos Aires,423967,38332521,90.413926
Chubut,695662,264489,0.380198
Santa Fe,141297,5965110,42.21682
Cordoba,170312,1955209,11.48016
Chaco,149995,12882,0.085883


In [64]:
data.area

Buenos Aires    423967
Chubut          695662
Santa Fe        141297
Cordoba         170312
Chaco           149995
Name: area, dtype: int64

In [48]:
type(data.area)

pandas.core.series.Series

In [49]:
data[['area']]

Unnamed: 0,area
Buenos Aires,423967
Chubut,695662
Santa Fe,141297
Cordoba,170312
Chaco,149995


In [51]:
type(data[['area']])

pandas.core.frame.DataFrame

In [52]:
data.area is data['area']

True

In [53]:
data.values

array([[4.23967000e+05, 3.83325210e+07, 9.04139261e+01],
       [6.95662000e+05, 2.64489000e+05, 3.80197567e-01],
       [1.41297000e+05, 1.96511000e+05, 1.39076555e+00],
       [1.70312000e+05, 1.95520900e+06, 1.14801599e+01],
       [1.49995000e+05, 1.28820000e+04, 8.58828628e-02]])

In [54]:
type(data.values)

numpy.ndarray

In [55]:
# Transpuesta
data.T

Unnamed: 0,Buenos Aires,Chubut,Santa Fe,Cordoba,Chaco
area,423967.0,695662.0,141297.0,170312.0,149995.0
poblacion,38332520.0,264489.0,196511.0,1955209.0,12882.0
densidad,90.41393,0.380198,1.390766,11.48016,0.085883


In [56]:
data.loc['Buenos Aires']

area         4.239670e+05
poblacion    3.833252e+07
densidad     9.041393e+01
Name: Buenos Aires, dtype: float64

In [57]:
data.iloc[:3,:2]

Unnamed: 0,area,poblacion
Buenos Aires,423967,38332521
Chubut,695662,264489
Santa Fe,141297,196511


In [58]:
data.iloc[:3][['area', 'poblacion']]

Unnamed: 0,area,poblacion
Buenos Aires,423967,38332521
Chubut,695662,264489
Santa Fe,141297,196511


In [66]:
data.loc[data.densidad > 30, ['poblacion', 'densidad']]

Unnamed: 0,poblacion,densidad
Buenos Aires,38332521,90.413926
Santa Fe,5965110,42.21682
