In [None]:
import rasterio
import pandas as pd
import geopandas as gpd
import os
import glob
from PIL import Image
from IPython.display import display
from funcs import plot_class_profiles, plot_class_profiles_mean, draw_table

# Prepair data

In [None]:
# # test area (data and image were made using QGIS)
# display(Image.open(os.path.join('examples', 'test_area.png')))

In [None]:
class_names = ['beans', 'potato', 'wheat', 'others']
class_colors = ['purple', 'green', 'goldenrod', 'brown']
bands = ['B2', 'B3', 'B4', 'B8', 'B11', 'B12','ndvi']   # add NDVI band (recommended in the literatures)
data_dr = os.path.join('data', 's2') # where data is saved
stacked_tif_dr = os.path.join('data', 'stacked_bands.tif') # where stacked image is saved

In [None]:
# sampling the tif file
src = rasterio.open(stacked_tif_dr)
tif_files = sorted(glob.glob(data_dr+'/*.tif'))
test_pts = gpd.read_file(os.path.join('data','points','val_pts.shp'))
test_pts = test_pts[['type','xcoord','ycoord','geometry']].sort_values(by=['type'])	# mention xcoord, ycoord in docs
coords = [(x,y) for x, y in zip(test_pts.xcoord, test_pts.ycoord)]
test_pts['Raster Value'] = [x for x in src.sample(coords)]		# mention same crs in docs

# put every tif band in a column
bands_names = []
for tif_file in tif_files:
  tif_name = os.path.basename(tif_file).split('.')[0]
  for band in bands:
    bands_names.append(f'{band}_{tif_name}')

test_pts = pd.concat([test_pts, pd.DataFrame(test_pts['Raster Value'].tolist(), index=test_pts.index, columns=bands_names)], axis=1)
test_pts = test_pts.drop(['xcoord','ycoord','geometry','Raster Value'], axis=1)
test_pts.to_csv(os.path.join('data','test_pts.csv')) # save our training dataset to CSV
test_pts.head() # visualize the first rows of the dataframe

In [None]:
# categories by class
class_dfs_test = [test_pts[test_pts['type'] == 1].iloc[:,:],
test_pts[test_pts['type'] == 2].iloc[:,:],
test_pts[test_pts['type'] == 3].iloc[:,:],
test_pts[test_pts['type'] == 5].iloc[:,:]]

# Visualize and inspect

In [None]:
# Plot Class profiles over our dataset
plot_class_profiles(class_dfs_test, class_colors, class_names, bands)

In [None]:
# Plot Class profiles over our dataset (mean)
plot_class_profiles_mean(test_pts, class_colors, class_names, bands)

# Test the model

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import pickle

In [None]:
# load the model
with open(os.path.join('data', 'trained_model.pkl'), "rb") as f:
	rf= pickle.load(f)

In [None]:
# Split test dataset to labels (y) and input features (x)
y_test= test_pts['type'].values
x_test = test_pts[[b for b in bands_names if "B" or 'ndvi' in b]].values

# Check the shapes of our feature datasets
print(f'The trainng data sizes are: Sentinel-2 {x_test.shape}, y {y_test.shape}')

In [None]:
# Run prediction on the test datastet
y_pred_test = rf.predict(x_test)
cr = classification_report(y_test, y_pred_test, target_names=class_names)
print('Sentinel-2')
print(cr)

In [None]:
# Save classification report
report = classification_report(y_test, y_pred_test, target_names=class_names, output_dict=True)
cr_df = pd.DataFrame(report).T
cr_df = cr_df.reset_index()
cr_df = cr_df.rename(columns={'index' :' '})
cr_df.to_csv(os.path.join('results', 'classification_report.csv'), index=True)

In [None]:
# plot classification report
cr_df = cr_df.round(2)
draw_table(cr_df, 'Classification Report')

In [None]:
# Confusion Matrix
from tabulate import tabulate
cm = confusion_matrix(y_test, y_pred_test)

# Prepare table data
table_data = []
# Table header
table_data.append(["True"] + list(class_names))

# Table rows
for i, name in enumerate(class_names):
    table_data.append([name] + list(cm[i]))

# Print the table using tabulate library
print("Confusion Matrix:\n")
print(tabulate(table_data, headers="firstrow", tablefmt="grid"))

In [None]:
# save confusion matrix
cm_df = pd.DataFrame(cm, index=class_names, columns=class_names)
cm_df = cm_df.reset_index()
cm_df = cm_df.rename(columns={'index' :' '})
cm_df.to_csv(os.path.join('results', 'confusion_matrix.csv'), index=True)

In [None]:
# Plot confusion matrix
draw_table(cm_df, 'Confusion Matrix')