In [20]:
import pandas as pd
import numpy as np
import random
import io

In [21]:
econdata = pd.read_csv('data/econdata.csv', index_col=0)
econdata.head()

Unnamed: 0_level_0,geo_point_2d,geo_shape,clave_cat,delegacion,perimetro,tipo,nom_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,"19.424781053,-99.1327537959","{""type"": ""Polygon"", ""coordinates"": [[[-99.1332...",307_130_11,Cuauhtémoc,B,Mercado,Pino Suárez
1,"19.4346139576,-99.1413808393","{""type"": ""MultiPoint"", ""coordinates"": [[-99.14...",002_008_01,Cuautémoc,A,Museo,Museo Nacional de Arquitectura Palacio de Bell...
2,"19.4340695945,-99.1306348409","{""type"": ""MultiPoint"", ""coordinates"": [[-99.13...",006_002_12,Cuautémoc,A,Museo,Santa Teresa
3,"19.42489472,-99.12073393","{""type"": ""MultiPoint"", ""coordinates"": [[-99.12...",323_102_06,Venustiano Carranza,B,Hotel,Balbuena
4,"19.42358238,-99.12451093","{""type"": ""MultiPoint"", ""coordinates"": [[-99.12...",323_115_12,Venustiano Carranza,B,Hotel,real


# Funciones de muestreo

## 1. Muestro simple

In [22]:
aleatorio_8 = econdata.sample(n=8)
aleatorio_8[['geo_point_2d']]

Unnamed: 0_level_0,geo_point_2d
id,Unnamed: 1_level_1
171,"19.4263416792,-99.1296771488"
192,"19.4313325389,-99.1497520491"
146,"19.4373584031,-99.1383047535"
111,"19.4430614318,-99.1353793874"
178,"19.4457531395,-99.1485982115"
116,"19.43339234,-99.14617457"
139,"19.4331109726,-99.1250977437"
158,"19.4362349051,-99.1302332694"


In [23]:
aleatorio_8_2 = econdata.sample(n=8)
aleatorio_8_2[['geo_point_2d']]

Unnamed: 0_level_0,geo_point_2d
id,Unnamed: 1_level_1
167,"19.4387807061,-99.1398360896"
104,"19.43397933,-99.13044075"
155,"19.4416371812,-99.1477800117"
200,"19.4340073515,-99.1372646267"
133,"19.4388860834,-99.1250947991"
202,"19.43916542,-99.1331749"
127,"19.42590571,-99.13203396"
194,"19.4288786806,-99.1456731565"


In [24]:
prop_25 = econdata.sample(frac=0.25)
prop_25.shape

(58, 7)

## 2. Muestreo sistemático

In [31]:
def systematic_sampling(data, step):
    indexes = np.arange(0,len(data), step=step)
    systematic_sample = data.iloc[indexes]
    return systematic_sample

systematic_sample = systematic_sampling(econdata, 4)
systematic_sample.index

Int64Index([  0,   4,   8,  12,  16,  20,  24,  28,  32,  36,  40,  44,  48,
             52,  56,  60,  64,  68,  72,  76,  80,  84,  88,  92,  96, 100,
            104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
            156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
            208, 212, 216, 220, 224, 228],
           dtype='int64', name='id')

## 3. Muestreo estratificado

In [32]:
econdata['estratificado'] = econdata['delegacion'] + "," + econdata['tipo']
(econdata['estratificado'].value_counts()/len(econdata)).sort_values(ascending=True)

Venustiano Carranza,Mercado    0.047826
Cuauhtémoc,Mercado             0.073913
Venustiano Carranza,Hotel      0.078261
Cuautémoc,Museo                0.156522
Cuautémoc,Hotel                0.643478
Name: estratificado, dtype: float64

In [33]:
def data_estratificada(econdata, nombres_columnas_estrat, valores_estrat, prop_estrat, random_state=None):
    
    df_estrat = pd.DataFrame(columns = econdata.columns) # Creamos un data frame vacío con los nombres de las columnas de econdata

    pos = -1
    for i in range(len(valores_estrat)): # iteración sobre los valores estratificados
        pos += 1
        if pos == len(valores_estrat) - 1: 
            ratio_len = len(econdata) - len(df_estrat) # si es la iteración final calcula el número de valores de salida tenga el mismo número de filas que de entrada
        else:
            ratio_len = int(len(econdata) * prop_estrat[i]) # calcula el número de filas según la proporción deseada

        df_filtrado = econdata[econdata[nombres_columnas_estrat] ==valores_estrat[i]] # filtra los datos de origen según los valores seleccionados en la estratificación de datos
        df_temp = df_filtrado.sample(replace=True, n=ratio_len, random_state=random_state) # haz un sample de los datos filtrados usando la ratio que hemos calculado
        
        df_estrat = pd.concat([df_estrat, df_temp]) # junta las tablas de sample con la estratificada para producir el resultado final
        
    return df_estrat # Return the stratified, re-sampled data   

In [34]:
valores_estrat = [
    'Cuauhtémoc,Hotel',
    'Cuauhtémoc,Museo',
    'Venustiano Carranza,Hotel',
    'Cuauhtémoc,Mercado']
proporciones = [0.4,0.1,0.1,0.4]

In [35]:
valores_estrat = ['Cuautémoc,Hotel', 'Cuautémoc,Museo', 'Venustiano Carranza,Hotel', 'Cuauhtémoc,Mercado','Venustiano Carranza,Mercado']
prop_estrat = [0.5, 0.2, 0.1, 0.1,0.1]
df_estrat = data_estratificada(econdata, 'estratificado', valores_estrat, prop_estrat, random_state=42)
df_estrat

Unnamed: 0,geo_point_2d,geo_shape,clave_cat,delegacion,perimetro,tipo,nom_id,estratificacion,estratificado
164,"19.4388741511,-99.1413308257","{""type"": ""MultiPoint"", ""coordinates"": [[-99.14...",003_113_03,Cuautémoc,B,Hotel,Dos Naciones,"Cuautémoc,Hotel","Cuautémoc,Hotel"
142,"19.4263681354,-99.1327278126","{""type"": ""MultiPoint"", ""coordinates"": [[-99.13...",006_127_14,Cuautémoc,A,Hotel,Ambar,"Cuautémoc,Hotel","Cuautémoc,Hotel"
27,"19.4348360773,-99.1463945583","{""type"": ""MultiPoint"", ""coordinates"": [[-99.14...",002_016_01,Cuautémoc,B,Hotel,Hilton Centro Histórico,"Cuautémoc,Hotel","Cuautémoc,Hotel"
168,"19.4349726565,-99.147766133","{""type"": ""MultiPoint"", ""coordinates"": [[-99.14...",002_014_23,Cuautémoc,B,Hotel,One Alameda,"Cuautémoc,Hotel","Cuautémoc,Hotel"
113,"19.43374405,-99.13550135","{""type"": ""MultiPoint"", ""coordinates"": [[-99.13...",001_012_13,Cuautémoc,A,Hotel,San Antonio,"Cuautémoc,Hotel","Cuautémoc,Hotel"
...,...,...,...,...,...,...,...,...,...
128,"19.4270781084,-99.1210175514","{""type"": ""Polygon"", ""coordinates"": [[[-99.1214...",323_061_04(123),Venustiano Carranza,B,Mercado,San Ciprian,"Venustiano Carranza,Mercado","Venustiano Carranza,Mercado"
37,"19.4271233834,-99.125111772","{""type"": ""Polygon"", ""coordinates"": [[[-99.1251...",323_065_01,Venustiano Carranza,B,Mercado,Dulceria,"Venustiano Carranza,Mercado","Venustiano Carranza,Mercado"
163,"19.4265454033,-99.1224859032","{""type"": ""Polygon"", ""coordinates"": [[[-99.1231...",323_063_05,Venustiano Carranza,B,Mercado,,"Venustiano Carranza,Mercado","Venustiano Carranza,Mercado"
156,"19.4255480371,-99.1249308096","{""type"": ""Polygon"", ""coordinates"": [[[-99.1253...",323_138_04 (3),Venustiano Carranza,B,Mercado,Mariscos,"Venustiano Carranza,Mercado","Venustiano Carranza,Mercado"
