In [1]:
## my token and baseUrl
token = 'TOKEN'
baseUrl = "http://agrodatacube.wur.nl/api/v1/rest"

In [2]:
## imports
import requests
import pandas as pd
import numpy as np

In [3]:
## helper functions

def get_fields(year, number):
    field_url = baseUrl+"/fields?output_epsg=4326&page_size="+str(number)+"&page_offset=0&year="+str(year)+"&epsg=4326"

    r = requests.get(url=field_url, headers={'token': token}) 
    if r.status_code != 200:
        print("%d %s" % (r.status_code, r.text))
        exit(-1)

    fields = r.json()
    return fields['features']



def get_soil_code(fieldid):
    soiltypes_url = baseUrl+"/fields/{0}/soiltypes".format(fieldid)
    soiltypes_data = requests.get(url=soiltypes_url, headers={'token': token})
    lijst = soiltypes_data.json()['features']
    
    maxopp = -1
    for el in lijst:
        if el['properties']['perimeter'] > maxopp:
            retval = el['properties']['soilcode']
            maxopp = el['properties']['perimeter']
    
    return retval

In [4]:
## get some fields
aantalperjaar = 60

allfields = []
for year in range(2009,2018):
    fields = get_fields(year,aantalperjaar)
    for field in fields:
        allfields.append(field)

In [5]:
## make dataframe

df = pd.DataFrame(columns=['id','x','y','jaar','crop1','crop2','soilcode'])


for field in allfields:
    x = field['geometry']['coordinates'][0][0][0][0]
    y = field['geometry']['coordinates'][0][0][0][1]
    jaar = field['properties']['year']
    crop = field['properties']['crop_name']
    crop_kort = crop.split(',')[0]
    fieldid = field['properties']['fieldid'] 

    soilcode = get_soil_code(fieldid)
    s = pd.Series([fieldid, x, y, jaar, crop_kort, crop, soilcode], df.columns)
    df = df.append(s, ignore_index=True)
df = df.set_index('soilcode')

In [6]:
## translate soils

def soil_code_to_type(code):
    soiltype_url = baseUrl+"/codes/soilcodes/{0}".format(code)
    soiltype_data = requests.get(soiltype_url, headers={'token': token})
    soil = soiltype_data.json()
    return soil

df = df.reset_index()
df = df.set_index('soilcode')
soilcode_translater = []
for soilcode in df.index.unique():
    try: 
        reqres = soil_code_to_type(soilcode)
        soilname = reqres['features'][0]['properties']['soilname']
        soiltype = reqres['features'][0]['properties']['soiltype']
        soilcode_translater.append({'soilcode': soilcode, 'soilname': soilname, 'soiltype': soiltype})
    except:
        soilcode_translater.append({'soilcode': soilcode, 'soilname': np.nan, 'soiltype': np.nan})

In [7]:
## make DF

sct = pd.DataFrame(soilcode_translater).set_index('soilcode')
df = df.join(sct, how='left').reset_index().dropna().set_index('id')
del df['soilcode']
del df['crop2']
df['gewas'] = ['Grasland' if 'rasland' in c else c for c in df['crop1']]
del df['crop1']
df2 = df.copy()
df3 = df2.copy()
df2

Unnamed: 0_level_0,x,y,jaar,soilname,soiltype,gewas
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
11176,5.696840,52.037825,2009,Zand,Podzolgronden,Overige natuurterreinen
5451888,5.774101,52.055922,2015,Zand,Podzolgronden,Natuurterreinen (incl. heide)
5536645,5.774101,52.055922,2016,Zand,Podzolgronden,Natuurterreinen (incl. heide)
6710023,5.774101,52.055922,2017,Zand,Podzolgronden,Natuurterreinen (incl. heide)
206315,5.708485,51.961387,2009,Zware zavel,Rivierkleigronden,Grasland
815110,5.698066,51.960849,2009,Zware zavel,Rivierkleigronden,Grasland
210111,5.715541,51.961253,2009,Zware zavel,Rivierkleigronden,Grasland
1516956,5.690179,51.958157,2010,Zware zavel,Rivierkleigronden,Grasland
1431518,5.714315,51.961698,2010,Zware zavel,Rivierkleigronden,Grasland
1577721,5.700410,51.958686,2010,Zware zavel,Rivierkleigronden,Grasland


In [8]:
df2['gewas']    = [gewas if gewas in ['Grasland', 'Mais', 'Tarwe'] else 'Overig' for gewas in df2['gewas']]
df2['soiltype'] = [soilt if soilt in ['Podzolgronden', 'Kalkloze zandgronden', 'Zeekleigronden', 
                                      'Dikke eerdgronden', 'Rivierkleigronden', 'Moerige gronden', 
                                      'Veengronden'] else 'Overig' for soilt in df2['soiltype']]
df2['soilname'] = [soiln if soiln in ['Zand', 'Zware zavel', 'Lichte zavel', 'Lichte klei', 'Veen', 
                                      'Moerig op zand', 'Leem'] else 'Overig' for soiln in df2['soilname']]


In [9]:
for col in df2.columns[2:]:
    print(df2[col].value_counts())
    print()

2016    60
2015    60
2014    60
2012    60
2011    60
2017    59
2013    59
2009    59
2010    58
Name: jaar, dtype: int64

Lichte zavel    253
Zand            130
Zware zavel     101
Overig           43
Veen              8
Name: soilname, dtype: int64

Rivierkleigronden       359
Podzolgronden           110
Kalkloze zandgronden     44
Dikke eerdgronden         9
Veengronden               8
Overig                    5
Name: soiltype, dtype: int64

Grasland    410
Mais         64
Overig       57
Tarwe         4
Name: gewas, dtype: int64



In [10]:
df2.head()

Unnamed: 0_level_0,x,y,jaar,soilname,soiltype,gewas
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
11176,5.69684,52.037825,2009,Zand,Podzolgronden,Overig
5451888,5.774101,52.055922,2015,Zand,Podzolgronden,Overig
5536645,5.774101,52.055922,2016,Zand,Podzolgronden,Overig
6710023,5.774101,52.055922,2017,Zand,Podzolgronden,Overig
206315,5.708485,51.961387,2009,Zware zavel,Rivierkleigronden,Grasland


In [11]:
from sklearn import preprocessing

df4 = pd.get_dummies(df2, columns=df2.columns[3:-1])
df4.head()

Unnamed: 0_level_0,x,y,jaar,gewas,soilname_Lichte zavel,soilname_Overig,soilname_Veen,soilname_Zand,soilname_Zware zavel,soiltype_Dikke eerdgronden,soiltype_Kalkloze zandgronden,soiltype_Overig,soiltype_Podzolgronden,soiltype_Rivierkleigronden,soiltype_Veengronden
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
11176,5.69684,52.037825,2009,Overig,0,0,0,1,0,0,0,0,1,0,0
5451888,5.774101,52.055922,2015,Overig,0,0,0,1,0,0,0,0,1,0,0
5536645,5.774101,52.055922,2016,Overig,0,0,0,1,0,0,0,0,1,0,0
6710023,5.774101,52.055922,2017,Overig,0,0,0,1,0,0,0,0,1,0,0
206315,5.708485,51.961387,2009,Grasland,0,0,0,0,1,0,0,0,0,1,0


In [12]:
#### # Required Python Packages
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

import pdb

In [13]:
train_percentage = 0.8
feature_headers  = list(set(df4.columns) - set(['gewas']))
target_header    = 'gewas'


train_x, test_x, train_y, test_y = train_test_split(df4[feature_headers], df4[target_header],
                                                        train_size=train_percentage)




In [14]:
clf = RandomForestClassifier()
trained_model = clf.fit(train_x, train_y)
predictions = trained_model.predict(test_x)

In [15]:
res = pd.DataFrame(test_y.copy())
res['pred'] = predictions
res

Unnamed: 0_level_0,gewas,pred
id,Unnamed: 1_level_1,Unnamed: 2_level_1
3486405,Grasland,Grasland
614236,Overig,Grasland
1357170,Grasland,Grasland
3794943,Grasland,Grasland
2827038,Mais,Mais
5391024,Grasland,Grasland
3233139,Grasland,Grasland
674296,Overig,Overig
1570814,Grasland,Grasland
2315175,Mais,Grasland


In [16]:
    print("Train Accuracy :: ", accuracy_score(train_y, trained_model.predict(train_x)))
    print("Test Accuracy  :: ", accuracy_score(test_y, predictions))

Train Accuracy ::  0.9976635514018691
Test Accuracy  ::  0.8785046728971962


In [20]:
df2

Unnamed: 0_level_0,x,y,jaar,soilname,soiltype,gewas
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
11176,5.696840,52.037825,2009,Zand,Podzolgronden,Overig
5451888,5.774101,52.055922,2015,Zand,Podzolgronden,Overig
5536645,5.774101,52.055922,2016,Zand,Podzolgronden,Overig
6710023,5.774101,52.055922,2017,Zand,Podzolgronden,Overig
206315,5.708485,51.961387,2009,Zware zavel,Rivierkleigronden,Grasland
815110,5.698066,51.960849,2009,Zware zavel,Rivierkleigronden,Grasland
210111,5.715541,51.961253,2009,Zware zavel,Rivierkleigronden,Grasland
1516956,5.690179,51.958157,2010,Zware zavel,Rivierkleigronden,Grasland
1431518,5.714315,51.961698,2010,Zware zavel,Rivierkleigronden,Grasland
1577721,5.700410,51.958686,2010,Zware zavel,Rivierkleigronden,Grasland
