# Pre Processing Images

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import cv2
from datetime import datetime
from sklearn.utils import check_random_state
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay
from sklearn import svm
from sklearn.model_selection import GridSearchCV

from sklearn.ensemble import RandomForestClassifier


In [None]:
cloud_heights = ["Low", "Medium", "High"]

# cloud_heights = ["Low", "Medium-High"]

high_lvl_clouds = ["Ci", "Cc", "Cs", "Ct"]

mid_lvl_clouds = ["Ac", "As", "Ns"]

low_lvl_clouds = ["Cu", "Cb", "Sc", "St"]

In [None]:
import os
# identifyign folders/labels
cloud_labels = os.listdir("../resources/cloud-images/CCSN_v2")
# Removing the .DS_Store file - autogenerated metadata for a folder - native to macOS
if ".DS_Store" in cloud_labels: cloud_labels.remove(".DS_Store")

labeled_data = {}

for label in cloud_labels:
    cloud_images = os.listdir("../resources/cloud-images/CCSN_v2/" + label)
    if label in high_lvl_clouds:
        for image in cloud_images:
            labeled_data[image] =  [{'label': label, 'height': "High"}]

    if label in mid_lvl_clouds:
        for image in cloud_images:
            labeled_data[image] =  [{'label': label, 'height': "Medium"}]

    if label in low_lvl_clouds:
        for image in cloud_images:
            labeled_data[image] =  [{'label': label, 'height': "Low" }]

print(labeled_data)

In [None]:
train_images = []
train_heights = []

start_datetime = datetime.now()

for (i, image_file) in enumerate(labeled_data):
    #read image
    path = '../resources/cloud-images/CCSN_v2/'+ labeled_data[image_file][0]['label'] + '/' + image_file
    image = cv2.imread(path)
    plt.imshow(image)
    plt.show()
    image = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
    height = cloud_heights.index(labeled_data[image_file][0]['height'])
    tmp_height = labeled_data[image_file][0]['height']
    image = cv2.resize(image, (32, 32))
    train_images.append(image)
    train_heights.append(height)
    print('Loaded...', '\U0001F483', 'Image', str(i+1), 'is a', tmp_height)
    plt.imshow(image)
    plt.show()

end_datetime = datetime.now()

In [None]:
train_images = np.array(train_images)
train_heights = np.array(train_heights)

X, y = train_images,train_heights

In [None]:
random_state = check_random_state(0)
permutation = random_state.permutation(X.shape[0])
X = X[permutation]
y = y[permutation]
X = X.reshape((X.shape[0], -1))

In [None]:
print('Image Processing Duration: ' + str(end_datetime-start_datetime))

# Splitting the data

test_size = 0.3

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=test_size, random_state=None)

In [None]:
# criterion = ["gini", "entropy", "log_loss"]
# n_estimators = [100, 200, 300]
max_depth = [5, 10, 20, None]
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
# max_features = ['sqrt', 'log2', None]

# param_grid = dict(max_depth=max_depth )

param_grid = dict(max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf)


In [None]:
rf_classifier = RandomForestClassifier()

grid = GridSearchCV(estimator=rf_classifier, param_grid=param_grid, cv=5, n_jobs=-1)

optimisation_start_datetime = datetime.now()
grid_result = grid.fit(X, y)
optimisation_end_datetime = datetime.now()

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
print("Execution time: " + str((optimisation_end_datetime - optimisation_start_datetime)))


In [None]:
rf_classifier = RandomForestClassifier()

In [None]:
rf_classifier.fit(X_train, y_train)


In [None]:
y_pred_rf = rf_classifier.predict(X_test)

In [None]:
print("Accuracy: "+str(accuracy_score(y_test, y_pred_rf)))

In [None]:
label_names = [0, 1, 2]
cmx = confusion_matrix(y_test, y_pred_rf, labels=label_names)

In [None]:
# Confusion Matrix
y_pred = svm_classifier.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=cloud_heights)
disp.plot(cmap="Blues")
plt.title(f"Confusion Matrix")
plt.grid(False)
plt.show()

In [None]:
# Detailed classification report
print("Classification Report:")
print(classification_report(y_test, y_pred_svm, target_names=cloud_heights))