In [1]:
import pandas as pd
import numpy as np

In [2]:
series_example2 = pd.Series([-0.5, 0.75, 1.0, -2], index=['a', 'b', 'c', 'd'])
series_example2

a   -0.50
b    0.75
c    1.00
d   -2.00
dtype: float64

In [3]:
'a' in series_example2

True

In [4]:
series_example2.keys()

Index(['a', 'b', 'c', 'd'], dtype='object')

In [5]:
list(series_example2.items())

[('a', -0.5), ('b', 0.75), ('c', 1.0), ('d', -2.0)]

In [6]:
series_example2['e'] = 1.25
series_example2

a   -0.50
b    0.75
c    1.00
d   -2.00
e    1.25
dtype: float64

# Enmascaramiento

In [7]:
series_example2[(series_example2 > -1) & (series_example2 < 0.8)]

a   -0.50
b    0.75
dtype: float64

# Indenxacion avanzada
Al realizar la segmentación con un índice explícito (es decir, series_example2['a':'c']), el índice final se incluye en el segmento. Al realizar la segmentación con un índice explícito (es decir, series_example2[0:2]), el índice final se excluye del segmento.

In [9]:
series_example2['a':'c']
series_example2[0:2]

a   -0.50
b    0.75
dtype: float64

# loc e iloc
Estos no son métodos funcionales. Son atributos que exponen una interfaz de segmentación determinada a los datos del objeto Series.

In [13]:
print(series_example2.loc['a':'c'])
print(series_example2.iloc[0:2])

a   -0.50
b    0.75
c    1.00
dtype: float64
a   -0.50
b    0.75
dtype: float64


# Seleccion de datos en DataFrame

In [3]:
area = pd.Series({'Albania': 28748,
                  'France': 643801,
                  'Germany': 357386,
                  'Japan': 377972,
                  'Russia': 17125200})
population = pd.Series ({'Albania': 2937590,
                         'France': 65429495,
                         'Germany': 82408706,
                         'Russia': 143910127,
                         'Japan': 126922333})
countries = pd.DataFrame({'Area': area, 'Population': population})
countries

Unnamed: 0,Area,Population
Albania,28748,2937590
France,643801,65429495
Germany,357386,82408706
Japan,377972,126922333
Russia,17125200,143910127


In [9]:
countries['Area']
countries['Population Density']=countries['Population']/countries['Area']
countries

Unnamed: 0,Area,Population,Population Density
Albania,28748,2937590,102.184152
France,643801,65429495,101.629999
Germany,357386,82408706,230.587393
Japan,377972,126922333,335.798242
Russia,17125200,143910127,8.403413


Se puede observar sus valores como una matriz bidimencional

In [13]:
print(f"{countries.values} \n")
print(countries.values.T)


[[2.87480000e+04 2.93759000e+06 1.02184152e+02]
 [6.43801000e+05 6.54294950e+07 1.01629999e+02]
 [3.57386000e+05 8.24087060e+07 2.30587393e+02]
 [3.77972000e+05 1.26922333e+08 3.35798242e+02]
 [1.71252000e+07 1.43910127e+08 8.40341292e+00]] 

[[2.87480000e+04 6.43801000e+05 3.57386000e+05 3.77972000e+05
  1.71252000e+07]
 [2.93759000e+06 6.54294950e+07 8.24087060e+07 1.26922333e+08
  1.43910127e+08]
 [1.02184152e+02 1.01629999e+02 2.30587393e+02 3.35798242e+02
  8.40341292e+00]]


In [16]:
countries
countries.iloc[:3, :2] #permite seleccionar las filas y columnas que se desean
countries.loc[:'Germany', :'Population'] #permite seleccionar las filas y columnas que se desean pero dando el nombre del index

Unnamed: 0,Area,Population
Albania,28748,2937590
France,643801,65429495
Germany,357386,82408706


In [21]:
index=countries['Area'] > 28748
countries[index]

countries.loc[:'Germany', :'Population']

Unnamed: 0,Area,Population
Albania,28748,2937590
France,643801,65429495
Germany,357386,82408706


# la indexación hace referencia a las columnas, y la segmentación, a las filas

In [22]:
countries['France':'Japan'] #segmentación
countries[1:3] #indexación


Unnamed: 0,Area,Population,Population Density
France,643801,65429495,101.629999
Germany,357386,82408706,230.587393
