In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import classification_report

import cv2 as cv
import os

In [None]:
df = pd.read_csv('./dataset/csv/dataset_kanter.csv')
df

In [3]:
validation_images = [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 54, 55, 56, 64, 65, 66]
df = df.rename(columns={'h': 'hue', 's': 'saturation', 'v': 'value'})

train = df[df['n_img'].isin(validation_images)]
test = df[~df['n_img'].isin(validation_images)]

train = train.drop(['n_img', 'n_row', 'n_col'], axis=1)
test = test.drop(['n_img', 'n_row', 'n_col'], axis=1)

X_train = train.drop('tile_type', axis=1)
X_test = test.drop('tile_type', axis=1)
y_train = train['tile_type'].astype('category')
y_test = test['tile_type'].astype('category')

In [4]:
def get_terrain(hue, saturation, value):
    if 23 <= hue <= 26:
        return 0
    elif 36 <= hue <= 39:
        return 2
    elif 40 <= hue <= 45:
        return 1
    elif 105 <= hue <= 109:
        return 4
    elif 20 <= hue <= 22:
        return 3
    elif 23 <= hue <= 26:
        return 5
    elif 23 <= hue <= 30:
        return 6
    return 7

In [5]:
def predict(X):
    pred = np.zeros(len(X))


    for i in range(len(X)):
        pred[i] = get_terrain(*X.iloc[i])

    return pred

In [None]:
y_pred = predict(X_test)
y_pred

In [None]:

tile_types_str = np.array([
    "Wheat Field",
    "Grassland",
    "Forest",
    "Swamp",
    "Lake",
    "Mine",
    "Castle",
    "Table",
])

y_test_df = pd.DataFrame(y_test).astype('int').apply(lambda x: tile_types_str[x])
y_pred_df = pd.DataFrame(y_pred).astype('int').apply(lambda x: tile_types_str[x])

# print(y_test_df)
# print(y_pred_df)

# y_test = np.choose(tile_types_str, y_test)
# y_pred = np.choose(tile_types_str, y_pred)

report = classification_report(y_test_df, y_pred_df, output_dict=True)

report_df = pd.DataFrame(report).transpose()

report_percentages = report_df.drop(['f1-score', 'support'], axis=1).mul(100).round(1).astype(str) + '%'
report_support = report_df['support']

report_percentages['support'] = report_support.astype('int')
report_percentages


In [8]:
tile_classes = df['tile_type'].unique()
confusion_matrix = np.zeros((len(tile_classes), len(tile_classes)))

for i in range(len(tile_classes)):
    for j in range(len(tile_classes)):
        confusion_matrix[j, i] = np.sum(y_pred[y_test == i] == j)

# pd.DataFrame(confusion_matrix)
# sum(y == 6)

In [None]:
# Create a figure and axis
plt.figure(figsize=(10, 8))

tile_classes_str = [
    "Wheat Field",
    "Grassland",
    "Forest",
    "Swamp",
    "Lake",
    "Mine",
    "Castle",
    "Table",
]

# Create the heatmap using Seaborn
sns.heatmap(confusion_matrix.astype(np.int16), annot=True, fmt='d', cmap='Blues', 
            xticklabels=tile_classes_str,  # Replace with your class labels
            yticklabels=tile_classes_str)  # Replace with your class labels

# Set labels and title
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.title('Confusion Matrix')

# Show the plot
plt.show()

In [None]:
print(f'med i cm: {confusion_matrix.sum()}')
print(f'ikke med i cm: {len(y_pred) - confusion_matrix.sum()}')

print(f'andel ikke med i cm: {(len(y_pred) - confusion_matrix.sum()) / len(y_pred):0.01%}')
