<a href="https://colab.research.google.com/github/OscoLP/DeepLearning-RemoteSensing/blob/main/ML_Example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

***Mount GoogleDrive***

In [None]:
from google.colab import drive 
drive.mount('/content/gdrive')

***Read the .csv file***


In [None]:
import pandas as pd
data = pd.read_csv('gdrive/My Drive/Colab Notebooks/data/Data_SoyPlant.csv') #example given at a binary classification problem with plants

def to_numeric(x):
    if x == 'Control': return 0 #example given at a binary classification problem with control/healthy plants
    if x == 'Damaged': return 1 #example given at a binary classification problem with damaged plants

data['Label'] = data['Label'].apply(to_numeric)

x_data = data.iloc[:, :-1].values
y_data = data.iloc[:, -1].values

***Split a dataset into train and test sets***

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.3)

***Build the models***

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier

models = []
models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('RF', RandomForestClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC(gamma='auto')))
models.append(('GB', GradientBoostingClassifier()))
models.append(('MLP', MLPClassifier(hidden_layer_sizes=(128, 64, 32), max_iter=1000)))

***Evaluate each model***

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import RepeatedStratifiedKFold

results = []
names = []
for name, model in models:
	kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)
  #kfold = RepeatedStratifiedKFold(n_splits=10, n_repeats=10, random_state=1)
	cv_results = cross_val_score(model, x_train, y_train, cv=kfold, scoring='accuracy')
	results.append(cv_results)
	names.append(name)
	print('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std()))

***Compare the models***

In [None]:
from matplotlib import pyplot

pyplot.boxplot(results, labels=names, boxprops=dict(color='red'))
pyplot.title('Algorithm Comparison')
pyplot.show()

***Store validation results on a .csv file***

In [None]:
import numpy as np
np.savetxt("gdrive/My Drive/Colab Notebooks/data/y.csv", results, delimiter=",")

***Make predictions on test set for one model***

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

#model = LogisticRegression(solver='liblinear', multi_class='ovr')
#model = LinearDiscriminantAnalysis()
#model = KNeighborsClassifier()
#model = DecisionTreeClassifier()
model = RandomForestClassifier()
#model = GaussianNB()
#model = SVC(gamma='auto')
#model = GradientBoostingClassifier()
#model = MLPClassifier(hidden_layer_sizes=(128, 64, 32), max_iter=1000)

model.fit(x_train, y_train)
predictions = model.predict(x_test)

print(accuracy_score(y_test, predictions))
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions, target_names=['Healthy', 'Damaged']))

***Plot the confusion matrix for one model***

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import numpy as np
import itertools

plt.figure()
cm = confusion_matrix(y_test, predictions)
def plot_confusion_matrix(cm, classes,
                        normalize=False,
                        title='Confusion Matrix',
                        cmap=plt.get_cmap('RdPu')):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
            horizontalalignment="center",
            color="white" if cm[i, j] > thresh else "black")

accuracy_score = np.trace(cm) / np.sum(cm).astype('float')
misclass = 1 - accuracy_score

plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label\naccuracy_score={:0.4f}; misclass={:0.4f}'.format(accuracy_score, misclass))
cm_plot_labels = ['Healthy', 'Damaged']

plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title='Confusion Matrix')

plt.show()