### Separate results

This notebook is used to explore disease (Alternaria) present on the field.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from database.db import SQLiteDatabase
from utils.metrics import calculate_classification_metrics

In [None]:
model_name = "alternaria_clf"

db = SQLiteDatabase()
records = db.get_records(is_latest=True, model_name=model_name)
container = {}
for record in records:
	for data, predictions in zip(record.data, record.predictions):
		data_name = data.name
		data_content = data.content
		pred_content = predictions.content
		container[data_name] = (data_content, pred_content)


In [None]:
def countplot(data_name: str):
	meta = container[data_name][0].meta.reset_index(drop=True)
	target = container[data_name][0].target
	df = pd.concat((meta, pd.DataFrame(target.label)), axis=1)
	mapping = {'class 1': 'Healthy', 'class 2': 'Infected'}
	df["label"] = df["label"].map(mapping)

	plt.figure(figsize=(8,3))
	sns.countplot(data=df, x="varieties", hue="label")

	plt.xlabel('Variety')
	plt.ylabel('Count')
	plt.title(data_name)
	plt.show()

countplot("train")
countplot("test")


In [None]:
data_name = "test"
meta = container[data_name][0].meta.reset_index(drop=True)
target = container[data_name][0].target

y_true = target.value.to_numpy()
y_pred = container[data_name][1].predictions

df = pd.DataFrame.from_dict(
	{
		"varieties": meta["varieties"],
		"y_true": y_true,
		"y_pred": y_pred,
	}
)

metrics = {}
for name, group in df.groupby("varieties"):
	metrics[name] = calculate_classification_metrics(group.y_true, group.y_pred)

print(f"--> Metrics on {data_name} data:")
print("Variety    | Accuracy |  F1  | Precision | Recall")
for variety, metric in metrics.items():
    print(
        f"{variety:<10} |     {metric.accuracy:.2f} | {metric.f1:.2f} "
        f"| {metric.precision:.2f}      | {metric.recall:.2f}"
        )

print(f"\n-->Average metrics on:\n{calculate_classification_metrics(y_true=y_true, y_pred=y_pred)}")