In [1]:
import pandas as pd
from numpy import array
import numpy as np
import math
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer

In [2]:
train = pd.read_csv('Train.csv')
print(train)
pressure_list = train['Pressure (KPa)'].tolist()
temp_list = train['Air temperature (C)'].tolist()
pressure_list_without_nan = []
for i in range(len(pressure_list)):
    if (np.isnan(pressure_list[i])):
        pressure_list_without_nan.append(pressure_list_without_nan[i - 24*12])
    else:
        pressure_list_without_nan.append(pressure_list[i])
temp_list_without_nan = []
for i in range(len(temp_list)):
    if (np.isnan(temp_list[i])):
        temp_list_without_nan.append(temp_list_without_nan[i - 24*12])
    else:
        temp_list_without_nan.append(temp_list[i])


                 timestamp  Soil humidity 1  Irrigation field 1  \
0      2019-02-23 00:00:00            67.92                 0.0   
1      2019-02-23 00:05:00            67.89                 0.0   
2      2019-02-23 00:10:00            67.86                 0.0   
3      2019-02-23 00:15:00            67.84                 0.0   
4      2019-02-23 00:20:00            67.81                 0.0   
5      2019-02-23 00:25:00            67.78                 0.0   
6      2019-02-23 00:30:00            67.76                 0.0   
7      2019-02-23 00:35:00            67.73                 0.0   
8      2019-02-23 00:40:00            67.70                 0.0   
9      2019-02-23 00:45:00            67.68                 0.0   
10     2019-02-23 00:50:00            67.65                 0.0   
11     2019-02-23 00:55:00            67.62                 0.0   
12     2019-02-23 01:00:00            67.59                 0.0   
13     2019-02-23 01:05:00            67.57                 0.

In [3]:
def firstNanIndex(listfloats, k):
    for i, item in enumerate(listfloats):
        if i > k and math.isnan(item) == True:
            return i
        
def firstNonNan(listfloats, j):
    bol = False
    for i, item in enumerate(listfloats[j:]):
        if math.isnan(item) == False:
            bol = True
            return i+j, item
    if (bol == False):
        return len(listfloats) -1 , listfloats[len(listfloats)-1]
    
def cleanSerie(columnName):
    field = train[columnName].tolist()
    cleanedList = [x for x in field if (math.isnan(x) == False)]
    return cleanedList

                


In [4]:
field1_without_Nan = cleanSerie('Soil humidity 1')
field1_without_Nan = field1_without_Nan[:-3]
field1 = train['Soil humidity 1'].tolist()
index1 = firstNanIndex(field1, 0)
field1 = field1[:index1+ 1153]

field2_without_Nan = cleanSerie('Soil humidity 2')
field2_without_Nan = field2_without_Nan[:-3]
field2 = train['Soil humidity 2'].tolist()
index2 = firstNanIndex(field2, 0)
field2 = field2[:index2+ 1747]

field3_without_Nan = cleanSerie('Soil humidity 3')
field3_without_Nan = field3_without_Nan[:-3]
field3 = train['Soil humidity 3'].tolist()
index3 = firstNanIndex(field3, 0)
field3 = field3[:index3+ 1153]

field4_without_Nan = cleanSerie('Soil humidity 4')
field4_without_Nan = field4_without_Nan[:-5]
field4 = train['Soil humidity 4'].tolist()
index4 = firstNanIndex(field4, 0)
field4 = field4[:index4+ 1729]


In [5]:
def prepare_data(train,field_number, field_without_Nan, preds_horizon):
    target = train[['Pressure (KPa)','Air temperature (C)', 'Soil humidity ' + str(field_number),'Irrigation field ' +str(field_number)]][:len(field_without_Nan)]
    target['last'] = target[['Soil humidity ' + str(field_number)]].shift(1)
   
    target['diff'] = target['Soil humidity ' + str(field_number)] - target['last']
   
    target['pressure'] = target['Pressure (KPa)']   
    target['pressure_last'] = target[['pressure']].shift(1)
    target['diff_pressure'] = target['pressure'] - target['pressure_last']    
   
    target['temperature'] = target['Air temperature (C)']
    target['temperature_last'] = target[['temperature']].shift(1)
    target['diff_temperature'] = target['temperature'] - target['temperature_last']
    
   
    
    
    del target['Air temperature (C)']
    del target['temperature_last']
    del target['pressure_last']   
    del target['temperature']
    del target['pressure']   
    del target['Pressure (KPa)']
   

  
    target['irrigation_now'] = train['Irrigation field ' +str(field_number)][:len(field_without_Nan)]
    X = target.dropna()
   
    
    del X['Soil humidity ' + str(field_number)]
    del X['last']
    
   
  
    Y = X['diff']
    
    del X['Irrigation field ' +str(field_number)]   
    del X['diff']

    return X, Y


In [6]:
def train_xgboost(data_X, data_Y, random_state=0):
    my_imputer = SimpleImputer()
    final_train = my_imputer.fit_transform(data_X)
    train_X, test_X, train_y, test_y = train_test_split(final_train, data_Y, test_size=0.1, random_state=random_state)
    my_model = XGBRegressor(n_estimators=1500, learning_rate=0.1, random_state=random_state)
    my_model.fit(train_X, train_y, early_stopping_rounds=5,eval_metric=["rmse"], eval_set=[(test_X, test_y)], verbose=False)
    return my_model


In [7]:
def findStep(last_prediction, moisture_list, index):
    j = index
    while(np.isnan(moisture_list[j])):
        j =j+1
    step = (moisture_list[j] - last_prediction ) /(j-index)
    return step
    


In [8]:
def get_Predictions(model, field_number, field_without_Nan, field, train_last_index, preds_horizon):
    moisture = field[len(field_without_Nan)-1:]
    irrigationFrame = train[['Irrigation field ' +str(field_number)]][len(field_without_Nan)-1:len(field_without_Nan)+preds_horizon]
    

    preds = []
    k=0
    index=-1
    bol = False
    for i in range(preds_horizon):
        index= index + 1

        temperature = temp_list_without_nan[train_last_index + index]
        temperature_last = temp_list_without_nan[train_last_index + index - 1]
        temperature_diff = temperature - temperature_last
        
        pressure = pressure_list_without_nan[train_last_index + index]
        pressure_last = pressure_list_without_nan[train_last_index + index - 1]
        pressure_diff = pressure - pressure_last
        
        irrigation = irrigationFrame.iloc[i+1]['Irrigation field ' + str(field_number)]
        if(np.isnan(irrigation)):
            irrigation =0

        if(irrigation == 0):
            bol = False
            x= np.array([pressure_diff, temperature_diff, irrigation])
            x = x[np.newaxis,...]
            prediction = model.predict(x) 
        if(irrigation == 1 and index ==0):
            x= np.array([pressure_diff, temperature_diff, irrigation])
            x = x[np.newaxis,...]
            prediction = model.predict(x) 
        if(irrigation == 1 and index > 0):
            if(bol == False):
                step =findStep(preds[-1], moisture, index)
                prediction = [step]
                bol = True
            else:
                prediction = [step]                
     
        if np.isnan(moisture[i]):
            preds.append(prediction[0]+ preds[-1])
            k=k+1
        else:
            preds.append(prediction[0]+ moisture[i])
            k=0
            
        k=k+1
    return preds

In [13]:
seed = 90973
data_X, data_Y = prepare_data(train,1, field1_without_Nan, 1153)
print(data_X)

model1 = train_xgboost(data_X, data_Y, seed)
preds1 = get_Predictions(model1,1, field1_without_Nan,field1, 8914, 1153)

print(preds1)

data_X, data_Y = prepare_data(train,2, field2_without_Nan, 1747)
model2 = train_xgboost(data_X, data_Y, seed)
preds2 = get_Predictions(model2,2, field2_without_Nan,field2, 26301, 1747)

data_X, data_Y = prepare_data(train,3, field3_without_Nan, 1153)
model3 = train_xgboost(data_X, data_Y, seed)
preds3 = get_Predictions(model3,3, field3_without_Nan,field3, 16083, 1153)

data_X, data_Y = prepare_data(train,4, field4_without_Nan, 1729)
model4 = train_xgboost(data_X, data_Y, seed)
preds4 = get_Predictions(model4,4, field4_without_Nan,field4, 26301, 1729)



      diff_pressure  diff_temperature  irrigation_now
1              0.00             -0.03             0.0
2              0.01             -0.02             0.0
3              0.00              0.07             0.0
4              0.00              0.07             0.0
5              0.00              0.06             0.0
6             -0.01              0.07             0.0
7              0.00              0.06             0.0
8              0.00              0.11             0.0
9              0.00              0.11             0.0
10             0.00              0.08             0.0
11            -0.01              0.09             0.0
12             0.00              0.11             0.0
13             0.00              0.11             0.0
14            -0.01              0.00             0.0
15             0.00              0.00             0.0
16             0.00              0.20             0.0
17             0.00              0.20             0.0
18             0.00         

In [10]:
total =[]
total.extend(preds1)
total.extend(preds2)
total.extend(preds3)
total.extend(preds4)

In [11]:
submission =pd.read_csv('SampleSubmission.csv')
submission['Values']= total
submission.to_csv('submission.csv', index=False)
