In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib.cm import rainbow
from sklearn.model_selection import train_test_split
#from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

# %matplotlib inline
import warnings
warnings.filterwarnings('ignore')


dIndices_ranges = [0.0157, 0.0376, 0.0734]

# mother_dataset = pd.read_csv('new_dataset.csv')

# training_dataset = mother_dataset[['OID_', 'pointid', 'grid_code', 'NDVI', 'NDVIre1n', 'NDVIre2n', 'NDVIre3n', 'NDBI', 'NBR', 'NBR2', 'CSI', 'BSI', 'Elevation', 'Slope', 'Aspect', 'dCSI']]

training_dataset_path = 'RF_dIndices_dataset/08_11_23_dNDVIre3n_Dataset.csv'
parameter_to_train = 'dNDVIre3n'
parameter_cat = 'dNDVIre3n_cat'

training_dataset = pd.read_csv(training_dataset_path)


conditions = [
    training_dataset[parameter_to_train] < dIndices_ranges[0],
    (training_dataset[parameter_to_train] >= dIndices_ranges[0]) & (training_dataset[parameter_to_train] < dIndices_ranges[1]),
    (training_dataset[parameter_to_train] >= dIndices_ranges[1]) & (training_dataset[parameter_to_train] < dIndices_ranges[2]),
    training_dataset[parameter_to_train] >= dIndices_ranges[2]
]

values = [0, 1, 2, 3]

training_dataset[parameter_cat] = np.select(conditions, values, default=3)

training_dataset.to_csv(training_dataset_path, index=False)

y = training_dataset[parameter_cat]
X = training_dataset.drop([parameter_cat, 'OID_', 'pointid', 'grid_code', parameter_to_train], axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 2)

# Training and Prediction
rf_classifier = RandomForestClassifier(n_estimators = 50).fit(X_train, y_train)
prediction = rf_classifier.predict(X_test)

importance = rf_classifier.feature_importances_

feature_names = ['NDVIre1n', 'NDVIre2n', 'NDVIre3n', 'CSI', 'BSI', 'NBR', 'NBR2',  'Elevation', 'Aspect', 'Slope', 'NDVI_NDBI']

importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': importance})

importance_df = importance_df.sort_values(by='Importance', ascending=False)


print(f"Training Accuracy: {rf_classifier.score(X_train, y_train)}")
# print(confusion_matrix(y_test, prediction))
print(f"\nTesting Accuracy: {accuracy_score(y_test, prediction)}")
print(classification_report(y_test, prediction))

print('\n')
print(importance_df)

Training Accuracy: 1.0

Testing Accuracy: 0.9674074074074074
              precision    recall  f1-score   support

           0       0.97      0.93      0.95       372
           1       0.97      0.99      0.98      1432
           2       0.96      0.91      0.94       218
           3       1.00      1.00      1.00         3

    accuracy                           0.97      2025
   macro avg       0.97      0.96      0.97      2025
weighted avg       0.97      0.97      0.97      2025



      Feature  Importance
1    NDVIre2n    0.270400
2    NDVIre3n    0.139869
0    NDVIre1n    0.096906
6        NBR2    0.071934
3         CSI    0.066739
8      Aspect    0.066451
5         NBR    0.065990
7   Elevation    0.064659
4         BSI    0.055979
9       Slope    0.054985
10  NDVI_NDBI    0.046087
