In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

dataset = pd.read_csv('./data/krishitantra/dataset.csv')

labels = np.array(dataset['SOC (%)'])
features = np.array(dataset.drop('SOC (%)', axis = 1))

train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.25, random_state = 42)

print('Training Features Shape:', train_features.shape)
print('Training Labels Shape:', train_labels.shape)
print('Testing Features Shape:', test_features.shape)
print('Testing Labels Shape:', test_labels.shape)

Training Features Shape: (630, 107)
Training Labels Shape: (630,)
Testing Features Shape: (210, 107)
Testing Labels Shape: (210,)


In [3]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(n_estimators = 1000, random_state = 42)
rf.fit(train_features, train_labels)

In [4]:
from sklearn.metrics import mean_absolute_error, r2_score

predictions = rf.predict(test_features)

score = mean_absolute_error(test_labels, predictions)
print("The Mean Absolute Error of our Model is {}".format(round(score, 2)))

score = r2_score(test_labels, predictions)
print("The accuracy of our model is {}%".format(round(score, 2) *100))

Mean Absolute Error: 0.47
The accuracy of our model is 54.0%


In [5]:
importances = list(rf.feature_importances_)

feature_list = list(dataset.columns)
feature_list.remove('SOC (%)')

feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(feature_list, importances)]
feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True)

for pair in feature_importances:
    print('Variable: {:20} Importance: {}'.format(*pair)) 

Variable: VDepth               Importance: 0.14
Variable: RSP                  Importance: 0.12
Variable: Y (DD)               Importance: 0.11
Variable: NDVI_sd              Importance: 0.05
Variable: EDF SW               Importance: 0.04
Variable: EDF N                Importance: 0.04
Variable: EDF NE               Importance: 0.03
Variable: MDM B                Importance: 0.02
Variable: MRRTF                Importance: 0.02
Variable: TWI                  Importance: 0.02
Variable: VIS                  Importance: 0.02
Variable: NDVI_median          Importance: 0.02
Variable: X (DD)               Importance: 0.02
Variable: DevME C              Importance: 0.01
Variable: DevME D              Importance: 0.01
Variable: DiffME C             Importance: 0.01
Variable: DiffME D             Importance: 0.01
Variable: EDF MID              Importance: 0.01
Variable: EDF NW               Importance: 0.01
Variable: EDF SE               Importance: 0.01
Variable: Gcurv                Importanc