# First Prototype Without NLP

## Import relevant libraries and load data

In [1]:
import numpy as np
import pandas as pd
import random
from haversine import haversine, Unit # NOTE: Needs install: pip install haversine

In [2]:
# Read in data

resourcesV3_data = pd.read_excel("../data/interim/MasterSyntheticDatabase_v3.xlsx", header=0, usecols="B:O")

In [3]:
# Show first 5 rows

resourcesV3_data.head()

Unnamed: 0,Crop,Category,Total Hectares,Production Hectares,Country,Location,Volume Kilos,Volume Tonnes,Total Workers,Worker Per Tonne,CO2 Per Tonne,Price Per Tonne,Price With Tax Per Tonne,Price With Tax And Subsidies Per Tonne
0,Abaca,Ornamental & medicinal plants,2016.89,8.2,Ecuador,"-78.65876593322058, 0.18292718409105735",16501.0,16.501,143,8.666141,5090000,2030,1928.2,2121.02
1,Açaí,Fruit,309.6,245.84,Brazil,"-52.42948195909473, -7.753611932129204",161524.0,161.524,2,0.012382,11850000,577,340.0,374.0
2,Asparagus,Vegetable,4351.63,431.87,Peru,"-74.69554673209434, -8.558079104309764",5482000.0,5482.0,5793,1.056731,11850000,893,656.0,721.6
3,Asparagus,Vegetable,170.0,105.0,Peru,"-73.29561564804418, -5.579917476132492",926098.5,926.0985,97,0.10474,11850000,893,656.0,721.6
4,Asparagus,Vegetable,239.2,120.0,Peru,"-78.29560844666452, -12.25535004522117",1080000.0,1080.0,337,0.312037,11850000,893,656.0,721.6


In [4]:
# Drop irrelevant columns for this task

resources_data = resourcesV3_data.drop(["Total Hectares", "Production Hectares", "Volume Kilos", "Total Workers"], axis=1)
resources_data.head()

Unnamed: 0,Crop,Category,Country,Location,Volume Tonnes,Worker Per Tonne,CO2 Per Tonne,Price Per Tonne,Price With Tax Per Tonne,Price With Tax And Subsidies Per Tonne
0,Abaca,Ornamental & medicinal plants,Ecuador,"-78.65876593322058, 0.18292718409105735",16.501,8.666141,5090000,2030,1928.2,2121.02
1,Açaí,Fruit,Brazil,"-52.42948195909473, -7.753611932129204",161.524,0.012382,11850000,577,340.0,374.0
2,Asparagus,Vegetable,Peru,"-74.69554673209434, -8.558079104309764",5482.0,1.056731,11850000,893,656.0,721.6
3,Asparagus,Vegetable,Peru,"-73.29561564804418, -5.579917476132492",926.0985,0.10474,11850000,893,656.0,721.6
4,Asparagus,Vegetable,Peru,"-78.29560844666452, -12.25535004522117",1080.0,0.312037,11850000,893,656.0,721.6


## Denote the borders of countries and generate a point for a hub

In [5]:
# Define a square denoted by an x range and a y range for each country

def getBoundaries(country):
    if country == "Brazil":
        return [[-70.084311, -46.969077], [-11.214025, 2.352550]]
    elif country == "Colombia":
        return [-75.577474, -70.084311], [-2.891532, 4.047229]
    elif country == "Ecuador":
        return [-79.137045, -75.577474], [-3.725094, 0.886501]
    elif country == "Peru":
        return [-79.137045, -70.172201], [-12.504216, -3.725094]
    elif country == "Suriname":
        return [-57.911460, -54.258506], [2.352550, 5.842207]
    else:
        raise Exception("Country not defined")

In [6]:
def randomHubLocation(country):
    # Seed random number generator
    random.seed(1)

    # For a hub in Colombia:
    bx, by = getBoundaries(country)
    hub_coord = [random.uniform(bx[0], bx[1]), random.uniform(by[0], by[1])]
    
    return hub_coord

hub_coord = randomHubLocation("Colombia")

In [7]:
# Distance between crop and hub
def distanceCropsHub(hub_coord, df):
    ''' Calculate the Harvesine distance between each crop and the hub. '''
    for i, crop in df.iterrows():
        x_crop_str, y_crop_str = crop["Location"].split(',', 1)
        crop_coord = [float(x_crop_str), float(y_crop_str)]

        distance = haversine(crop_coord, hub_coord)/10 # in km; divide by 10 to make distances more similar to what they will be in the raiinforest

        df.at[i, "Haversine Distance"] = distance
    
distanceCropsHub(hub_coord, resources_data)
resources_data.to_excel("../data/interim/MasterSyntheticDatabase_v4.xlsx")
resources_data.head()

Unnamed: 0,Crop,Category,Country,Location,Volume Tonnes,Worker Per Tonne,CO2 Per Tonne,Price Per Tonne,Price With Tax Per Tonne,Price With Tax And Subsidies Per Tonne,Haversine Distance
0,Abaca,Ornamental & medicinal plants,Ecuador,"-78.65876593322058, 0.18292718409105735",16.501,8.666141,5090000,2030,1928.2,2121.02,43.055198
1,Açaí,Fruit,Brazil,"-52.42948195909473, -7.753611932129204",161.524,0.012382,11850000,577,340.0,374.0,253.816417
2,Asparagus,Vegetable,Peru,"-74.69554673209434, -8.558079104309764",5482.0,1.056731,11850000,893,656.0,721.6,33.718183
3,Asparagus,Vegetable,Peru,"-73.29561564804418, -5.579917476132492",926.0985,0.10474,11850000,893,656.0,721.6,31.240568
4,Asparagus,Vegetable,Peru,"-78.29560844666452, -12.25535004522117",1080.0,0.312037,11850000,893,656.0,721.6,54.715081


In [8]:
#----------Sibylle INPUT
def recommendationAlgorithm(type_given, val):
    n_workers, money_needed = 0, 0
    
    if type_given == "Workers":
        n_workers = val
    elif type_given == "Money":
        money_needed = val

    conversion_USD_Real = 5.16 # As of 27/02/22

    if n_workers > 0:
        for i, crop in resources_data.iterrows():
            resources_data.at[i, "Tot Money"] = np.nan # Reset values in database
            
            tot_volume = min(crop["Volume Tonnes"], n_workers/crop["Worker Per Tonne"]) # Find the minimum value for volume of produce
            potential_money = crop["Price With Tax And Subsidies Per Tonne"] * tot_volume * conversion_USD_Real
            resources_data.at[i, "Tot Volume For Team"] = tot_volume
            resources_data.at[i, "Tot Money"] = potential_money

        closest = resources_data.nsmallest(10, 'Haversine Distance')
        best = closest.nlargest(3, 'Tot Money')
        best.head()

        answers = []

        #------ For Sibylle. 
        #These are: name of crop, location and the yearly income in Brasilian Real for producing it given the number of people in the team
        for i, crop in best.iterrows():
            # Note: return money in a month
            answers.append([crop["Crop"], str(np.round(float(crop["Tot Money"]), -3)/12)])

    elif money_needed > 0:
        print(money_needed)
        for i, crop in resources_data.iterrows():
            resources_data.at[i, "Workers Needed"] = np.nan # Reset values in database
            
            # Rearrange previous equation to find workers needed to make that much money
            workers_needed = money_needed * crop["Worker Per Tonne"] / (crop["Price With Tax And Subsidies Per Tonne"] * conversion_USD_Real)

            if workers_needed/crop["Worker Per Tonne"] < crop["Volume Tonnes"]:
                resources_data.at[i, "Workers Needed"] = workers_needed


        closest = resources_data.nsmallest(10, 'Haversine Distance')
        best = closest.nsmallest(3, 'Workers Needed')

        answers = []

        #------ For Sibylle. 
        #These are: name of crop, the number of people needed to make the money requested in a year
        for i, crop in best.iterrows():
            answers.append([crop["Crop"], str(np.ceil(float(crop["Workers Needed"])))])

            
    return answers # Array with 3 recommendations

print(recommendationAlgorithm("Workers", 5))



[['Coffee Arabica', '136666.66666666666'], ['Avocado', '39416.666666666664'], ['Banana', '35083.333333333336']]
