# Crop reallocation algorigthm: toy model (xarray version)

This notebook takes the code in toy_model.ipynb and translates it to xarray, in an effort to make it faster.

In [10]:
import numpy as np
import pandas as pd
import xarray as xr
import timeit

# 1. Set up parameters

Use the DataFrames defined in toy_model, then convet them to xarray objects

In [395]:
# define array dims
crops = ['soy', 'rice']
geo0 = [1]
geo1 = [1, 2, 3]

In [396]:
# present yields
yields = xr.DataArray(
    data = [[10, 20, 15],
           [20, 10, 15]],
    coords = [crops, geo1],
    dims = ['crop', 'geo1_id']
)

calories = xr.DataArray(
    data = [25, 15],
    coords = [crops],
    dims = ['crop']
)

acres_planted = xr.DataArray(
    data = [[40, 70, 0],
           [60, 30, 0]],
    coords = [crops, geo1],
    dims = ['crop', 'geo1_id']
)

total_acres = xr.DataArray(
    data = [[100, 100, 100],
           [100, 100, 100]],
    coords = [crops, geo1],
    dims = ['crop', 'geo1_id']
)

In [397]:
present_yields = xr.Dataset({
    'yields': yields,
    'calories': calories,
    'acres_planted': acres_planted,
    'total_acres': total_acres
})

present_yields['calorie_yield'] = present_yields.calories * present_yields.yields

In [398]:
# Make xr.Dataset incorporating Climate shocks

# xarray objects for yield shocks
yield_shocks = xr.DataArray(
    data = [[0.5, 0.8, 1],
           [0.9, 0.6, 1]],
    coords = [crops, geo1],
    dims = ['crop', 'geo1_id']
)

future_yields = yield_shocks * yields

future_yields = xr.Dataset({
    'yields': future_yields,
    'calories': calories,
    'acres_planted': acres_planted,
    'total_acres': total_acres
})

future_yields['calorie_yield'] = future_yields.calories * future_yields.yields

# 2. Set up function for calculating moments

In [140]:
def calculate_gamma(ds):
    '''
    Calculate 'gamma', the ratio of total calories produced to possible 
    calories produced.
    '''
    
    total_cal = (ds['calorie_yield']*ds['acres_planted']).sum()

    potential_cal = (ds[['geo1_id', 'calorie_yield']]
            .groupby('geo1_id')
            .max()
        )


    total_acres = (ds[['geo1_id', 'acres_planted']]
                   .groupby('geo1_id')
                   .sum()
                  )

    total_cal_potential = sum(total_acres['acres_planted']*potential_cal['calorie_yield'])

    return total_cal / total_cal_potential 

In [279]:
def analyze_empty_acreage(ds, crop):
    '''
    Returns the yield and plot id for the plot with the highest yield that currently has 
    empty space, as well as the yield and plot id for the plot with the lowest yield that 
    is currently occupied.
    These are the conditions that will be calculated in each iteration of the loop in
    `calculate_phi`
    
   Parameters:
    -----------
    '''
        
    acres_planted = ds.acres_planted.groupby('geo1_id').sum()
    total_acres = ds.total_acres.groupby('geo1_id').mean()

    empty_acres = total_acres - acres_planted

    assert all(empty_acres >= 0)

    empty_max_yield = (ds.where((empty_acres > 0) & (ds.crop == crop))
                       .yields.max())

    empty_max_id = ds.where(
        (empty_acres > 0) & (ds.yields == empty_max_yield) & (ds.crop == crop),
        drop=True)

    used_min_yield = (ds.where((ds.acres_planted) > 0 & (ds.crop == crop))
                     .yields.min())

    used_min_id = ds.where(
        (ds.acres_planted > 0) & (ds.yields == used_min_yield) & (ds.crop == crop),
        drop = True)
    return [empty_max_id, used_min_id]

In [280]:
def reallocate_crops(ds, crop, empty_max_id, used_min_id):
    '''
    A loop to reallocate crop area, moving one acre at a time from the 
    lowest-yielding parcel
    of the crop to the highest-yielding unoccupied parcel
    
    Parameters:
    -----------
    '''
    # add one acre to the highest-yielding plot that is currently empty
    empty_max_id['acres_planted'] += 1
    ds = empty_max_id.combine_first(ds)

    # remove one acrea to the lowest-yielding plot that is currently in use
    used_min_id['acres_planted'] -= 1
    ds = used_min_id.combine_first(ds)
    
    return ds

In [350]:
def calculate_phi(ds, crop):
    '''
    Calculate 'phi', the ratio of actual yields to the maximum possible
    yield that would be realized in a perfectly frictionless scenario
    with optimal acreage placement in a country. 
    
    Parameters:
    -----------
    '''
    # calculate actual yield
    actual_yield = (ds.where(ds.crop == crop).yields * 
                    ds.where(ds.crop == crop).acres_planted).sum()

    # calculate potential yield
    # get initial conditions
    empty_max_id, used_min_id = analyze_empty_acreage(ds, crop)

    while any(empty_max_id.yields.values > used_min_id.yields.values):
        ds = reallocate_crops(ds, crop, empty_max_id, used_min_id)

        empty_max_id, used_min_id = analyze_empty_acreage(ds, crop)
        
    potential_yield = (ds.where(ds.crop == crop).yields * 
                       ds.where(ds.crop == crop).acres_planted).sum()
    return actual_yield / potential_yield

In [357]:
from numba import jit
@jit
def calculate_phi_jit(ds, crop):
    '''
    Calculate_phi compiled by numba 
    
    Parameters:
    -----------
    '''
    # calculate actual yield
    actual_yield = (ds.where(ds.crop == crop).yields * 
                    ds.where(ds.crop == crop).acres_planted).sum()

    # calculate potential yield
    # get initial conditions
    empty_max_id, used_min_id = analyze_empty_acreage(ds, crop)

    while any(empty_max_id.yields.values > used_min_id.yields.values):
        ds = reallocate_crops(ds, crop, empty_max_id, used_min_id)

        empty_max_id, used_min_id = analyze_empty_acreage(ds, crop)
        
    potential_yield = (ds.where(ds.crop == crop).yields * 
                       ds.where(ds.crop == crop).acres_planted).sum()
    return actual_yield / potential_yield

# 3. Write functions that incorporate a climate shock and match moments

In [405]:
def calculate_distances(present, future, crops):
    '''
    Docstring here!
    '''
    present_moments = [calculate_gamma(present).values] + [calculate_phi(present, c).values for c in crops]
    future_moments = [calculate_gamma(future).values] + [calculate_phi(future, c).values for c in crops]

    print(present_moments, future_moments, '\n')
    
    distances = [p - f for p, f in zip(present_moments, future_moments)]
    return distances

In [406]:
# left off here--3/30/20. Values of calculate_distances seem weird

calculate_distances(present_yields, future_yields, ['soy', 'rice'])

[array(0.84375), array(0.9), array(0.90909091)] [array(0.77462687), array(0.76744186), array(1.)] 



[0.06912313432835826, 0.1325581395348837, -0.09090909090909094]

# Benchmark

In [75]:
import timeit

In [151]:
# calculate_gamma
timeit.timeit("lambda: calculate_gamma(present_yields)", number=10000)

0.0006795500012231059

In [203]:
# analyze_empty_acreage
timeit.timeit("lambda: analyze_empty_acreage(present_yields, 'soy')")

0.06293064999954368

In [288]:
# reallocate_crops
empty_max_id, used_min_id = analyze_empty_acreage(present_yields, 'soy')
timeit.timeit("lambda: reallocate_crops(present_yields, 'soy', empty_max_id, used_min_id)", number=10000)

0.001194758999190526

In [321]:
calculate_phi(present_yields, 'rice')



<xarray.DataArray ()>
array(0.956522)

In [364]:
# calculate_phi
timeit.timeit("lambda: calculate_phi(present_yields, 'soy')")

0.0627708789997996

In [371]:
# calculate_phi_jit
timeit.timeit("lambda: calculate_phi_jit(present_yields, 'soy')")

0.06177171099989209