In [116]:
import numpy as np
import os
import pandas as pd

cwd = os.getcwd()

def get_gid(meta):
    """
    Extract random gid from nsrdb ensuring samples are randomly sampled from available states and counties
    
    Parameters
    ----------
    meta : 'pandas.DataFrame'
        DataFrame of meta data from which to randomly samples pixels

    Returns
    -------
    gid : 'int'
        Selected gid
    """
    if len(meta['state'].unique()) > 1:
        state = np.random.choice(meta['state'].unique(), 1)[0]
        meta = meta.loc[meta['state'] == state]

    if len(meta['county'].unique()) > 1:
        county = np.random.choice(meta['county'].unique(), 1)[0]
        meta = meta.loc[meta['county'] == county]
    
    gid = np.random.choice(meta['gid'].values, 1)[0]
    return gid


def sample_nsrdb(meta, samples):
    """
    Randomly sample from nsrdb meta data
    Samples are selected from available countries, states, or counties
    
    Parameters
    ----------
    meta : 'pandas.DataFrame'
        DataFrame of meta data from which to randomly samples pixels
    samples : 'int'
        Number of samples to select

    Returns
    -------
    'pandas.DataFrame'
        Meta data for selected pixels
    """
    gids = []
    if len(meta['country'].unique()) > 1:
        countries = np.random.choice(meta['country'].unique(), samples)
        for country in countries:
            country_meta = meta.loc[meta['country'] == country]
            gids.append(get_gid(country_meta))      
    elif len(meta['state'].unique()) > 1:
        states = np.random.choice(meta['state'].unique(), samples)
        for state in states:
            state_meta = meta.loc[meta['state'] == state]
            gids.append(get_gid(state_meta))
    elif len(meta['county'].unique()) > 1:
        counties = np.random.choice(meta['county'].unique(), samples)
        for county in counties:
            county_meta = meta.loc[meta['county'] == county]
            gids.append(get_gid(county_meta))
    else:
        gids = np.random.choice(meta['gid'], samples)
        
    return meta.loc[gids]

# Randomly sample from nsrdb

In [117]:
path = os.path.join(cwd, 'nsrdb_meta.csv')
meta = pd.read_csv(path)
meta['gid'] = np.arange(len(meta))

In [118]:
countries = sample_nsrdb(meta, 5)
countries

Unnamed: 0,latitude,longitude,elevation,timezone,country,state,county,urban,population,landcover,gid
1572572,18.09,-63.02,1.0,-4,St-Martin,MAF-00 (St. Martin aggregation,,,3816,210,1572572
1734380,46.97,-56.34,1.0,-3,St. Pierre and Miquelon,SPM-00 (St. Pierre and Miquelo,,,10,210,1734380
1349196,18.37,-70.38,457.24,-4,Dominican Rep.,Peravia,Baní,,411,40,1349196
1229219,22.37,-74.06,4.72,-5,Bahamas,BHS-00 (Bahamas aggregation),,,2,40,1229219
1318590,19.37,-71.3,478.85,-4,Dominican Rep.,Santiago Rodríguez,San Ignacio de Sabaneta,,610,30,1318590


In [119]:
US = meta.loc[meta['country'] == 'United States']
states = sample_nsrdb(US, 5)
states

Unnamed: 0,latitude,longitude,elevation,timezone,country,state,county,urban,population,landcover,gid
1129606,38.93,-77.1,75.75,-5,United States,District of Columbia,District of Columbia,"Washington, D.C.",20118,70,1129606
930832,37.65,-85.66,261.04,-5,United States,Kentucky,Larue,,37,50,930832
762566,47.37,-93.38,414.44,-6,United States,Minnesota,Itasca,,118,50,762566
97624,48.05,-124.38,209.8,-8,United States,Washington,Clallam,,40,100,97624
283316,47.05,-113.34,1443.0,-7,United States,Montana,Missoula,,0,150,283316


In [114]:
CO = conus.loc[US['state'] == 'Colorado']
counties = sample_nsrdb(CO, 5)
counties

Unnamed: 0,latitude,longitude,elevation,timezone,country,state,county,urban,population,landcover,gid
408282,38.13,-107.62,2663.3,-7,United States,Colorado,Ouray,,0,70,408282
467155,37.89,-105.14,2678.44,-7,United States,Colorado,Huerfano,,1,70,467155
542324,38.49,-102.26,1245.44,-7,United States,Colorado,Kiowa,,0,130,542324
525165,40.81,-102.9,1147.8,-7,United States,Colorado,Logan,,4,14,525165
410110,40.01,-107.54,2404.08,-7,United States,Colorado,Rio Blanco,,6,70,410110


In [115]:
Denver = CO.loc[CO['county'] == 'Denver']
pixels = sample_nsrdb(Denver, 5)
pixels

Unnamed: 0,latitude,longitude,elevation,timezone,country,state,county,urban,population,landcover,gid
469107,39.61,-105.06,1685.28,-7,United States,Colorado,Denver,Denver,15089,70,469107
469648,39.65,-105.06,1672.64,-7,United States,Colorado,Denver,Denver,22076,70,469648
480934,39.85,-104.62,1618.84,-7,United States,Colorado,Denver,,2,30,480934
472706,39.77,-104.94,1596.64,-7,United States,Colorado,Denver,Denver,27744,190,472706
469107,39.61,-105.06,1685.28,-7,United States,Colorado,Denver,Denver,15089,70,469107
