In [None]:
import rasterio
import numpy as np
import pandas as pd
import geopandas as gpd
import os
import glob
from PIL import Image
from IPython.display import display
from funcs import plot_class_profiles, plot_class_profiles_mean

# Prepair Data

In [None]:
# # training area e.g. (data and image were made using QGIS)
# display(Image.open(os.path.join('examples', 'train_area.png')))

In [None]:
class_names = ['beans', 'potato', 'wheat', 'others']
class_colors = ['purple', 'green', 'goldenrod', 'brown']
bands = ['B2', 'B3', 'B4', 'B8', 'B11', 'B12','ndvi']   # add NDVI band (recommended in the literatures)
data_dr = os.path.join('data', 's2') # where data is saved
stacked_tif_dr = os.path.join('data', 'stacked_bands.tif') # where stacked image is saved

In [None]:
# sampling the tif file
src = rasterio.open(stacked_tif_dr)
tif_files = sorted(glob.glob(data_dr+'/*.tif'))
train_pts = gpd.read_file(os.path.join('data','points','train_pts.shp'))
train_pts = train_pts[['type','xcoord','ycoord','geometry']].sort_values(by=['type'])	# mention xcoord, ycoord in docs
coords = [(x,y) for x, y in zip(train_pts.xcoord, train_pts.ycoord)]
train_pts['Raster Value'] = [x for x in src.sample(coords)]		# mention same crs in docs

# put every tif band in a column
bands_names = []
for tif_file in tif_files:
  tif_name = os.path.basename(tif_file).split('.')[0]
  for band in bands:
    bands_names.append(f'{band}_{tif_name}')

train_pts = pd.concat([train_pts, pd.DataFrame(train_pts['Raster Value'].tolist(), index=train_pts.index, columns=bands_names)], axis=1)
train_pts = train_pts.drop(['xcoord','ycoord','geometry','Raster Value'], axis=1)
train_pts.to_csv(os.path.join('data','train_pts.csv')) # save our training dataset to CSV
train_pts.head() # visualize the first rows of the dataframe

In [None]:
# categories data by type to split the training / validation data efficiently
class_dfs = [train_pts[train_pts['type'] == 1].iloc[:,:],
train_pts[train_pts['type'] == 2].iloc[:,:],
train_pts[train_pts['type'] == 3].iloc[:,:],
train_pts[train_pts['type'] == 5].iloc[:,:]]

# Visualize and inspect

In [None]:
# Plot Class profiles over our dataset
plot_class_profiles(class_dfs, class_colors, class_names, bands)

In [None]:
# Plot Class profiles over our dataset (mean)
plot_class_profiles_mean(train_pts, class_colors, class_names, bands)

# Train the model

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import pickle

In [None]:
# spilt the data into training and validation
values_arrays = []
for class_df in class_dfs:
  # Split training dataset to labels (y) and input features (x)
  y = class_df['type'].values
  x = class_df[[b for b in bands_names if "B" or 'ndvi' in b]].values # Only Sentinel-2 data
  # del(class_df, coords)
  values_arrays.append(train_test_split(x, y, test_size=0.30, shuffle = True,random_state=10)) # [x_train, x_val, y_train, y_val]

In [None]:
x_train = np.zeros([0,len(bands_names)])
x_val = np.zeros([0,len(bands_names)])
y_train = np.zeros([0])
y_val = np.zeros([0])
for values_array in values_arrays:
  x_train = np.append(x_train, values_array[0], axis= 0)
  x_val = np.append(x_val, values_array[1], axis= 0)
  y_train = np.append(y_train, values_array[2])
  y_val = np.append(y_val, values_array[3])

In [None]:
print(f'The trainng data sizes are: Sentinel-2 x_train{x_train.shape}, x_val{x_val.shape},y_train {y_train.shape},y_val{y_val.shape}')

In [None]:
# train the model
rf = RandomForestClassifier(n_estimators=300, oob_score=True)
rf= rf.fit(x_train, y_train)# Fit the model to the training dataset
# The 00B score of the training dataset obtained using an out-of-bag estimate.
print('Our 00B prediction of accuracy for s2 stack is: {oob}s'.format(oob=rf.oob_score_ * 100))

In [None]:
# Run prediction on the validation dataste
y_pred = rf.predict(x_val)
print('Sentinel-2')
print(classification_report(y_val, y_pred, target_names=class_names))

In [None]:
# Confusion Matrix
from tabulate import tabulate
cm = confusion_matrix(y_val, y_pred)

# Prepare table data
table_data = []
# Table header
table_data.append(["True"] + list(class_names))

# Table rows
for i, name in enumerate(class_names):
    table_data.append([name] + list(cm[i]))

# Print the table using tabulate library
print("Confusion Matrix:\n")
print(tabulate(table_data, headers="firstrow", tablefmt="grid"))

In [None]:
# Save the trained model
with open(os.path.join('data', 'trained_model.pkl'), "wb") as f:
	pickle.dump(rf, f)