In [None]:
import pandas as pd
import statistics
import glob

dfs = []
for f in glob.glob("/tmp/data/SPI_*.csv.zip"):
    dfs.append(pd.read_csv(f))
SPI = pd.concat(dfs)

dfs = []
for f in glob.glob("/tmp/data/AOI_*.csv.zip"):
    dfs.append(pd.read_csv(f))
AOI = pd.concat(dfs)

In [None]:
results_1 = set(classification_1(SPI))
results_2 = classification_2(SPI, AOI[["PanelID","FigureID","MachineID","ComponentID","PinNumber","AOILabel"]])
results_3 = classification_3(SPI, AOI[["PanelID","FigureID","MachineID","ComponentID","PinNumber","AOILabel","OperatorLabel"]])

# Performance Task 1
groundtruth_1  = {tuple( [str(f) for f in e] ) for e in AOI[["PanelID","FigureID","ComponentID"]].values}
precision_1    = len(results_1&groundtruth_1)/len(results_1) if len(results_1) > 0 else 0
recall_1       = len(results_1&groundtruth_1)/len(groundtruth_1) if len(groundtruth_1) > 0 else 0
f1_1           = 2*precision_1*recall_1/(precision_1+recall_1) if precision_1+recall_1 > 0 else 0

# Performance Task 2
results_dict_2 = { (str(p), str(f), str(c)):l for p, f, c, l in results_2}
validationdata_2 = []
for t in AOI.drop_duplicates(subset=["PanelID","FigureID","ComponentID"], keep="first").itertuples():
    predicted = results_dict_2.get(( str(t.PanelID), str(t.FigureID), str(t.ComponentID)), "-" )
    validationdata_2.append((t.PanelID, t.FigureID, t.ComponentID, t.OperatorLabel, predicted))
validationdata_2 = pd.DataFrame(validationdata_2, columns = ["PanelID","FigureID","ComponentID", "Real", "Predicted"]) 
f1_2 = classification_report(validationdata_2["Real"], validationdata_2["Predicted"],output_dict=True)["Bad"]["f1-score"]

# Performance Task 3
results_dict_3 = { (str(p), str(f), str(c)):l for p, f, c, l in results_3}
validationdata_3 = []
for t in AOI[AOI["RepairLabel"].isin({"FalseScrap","NotPossibleToRepair"})]\
        .drop_duplicates(subset=["PanelID","FigureID","ComponentID"], keep="first").itertuples():
    predicted = results_dict_3.get(( str(t.PanelID), str(t.FigureID), str(t.ComponentID)), "-" )
    validationdata_3.append((t.PanelID, t.FigureID, t.ComponentID, t.RepairLabel, predicted))
validationdata_3 = pd.DataFrame(validationdata_3, columns = ["PanelID","FigureID","ComponentID", "Real", "Predicted"]) 
cr = classification_report(validationdata_3["Real"], validationdata_3["Predicted"],output_dict=True)
f1_3 = (cr["FalseScrap"]["f1-score"] + cr["NotPossibleToRepair"]["f1-score"])/2

print("F1 Score Task 1:", f1_1)
print("F1 Score Task 2:", f1_2)
print("F1 Score Task 3:", f1_3)
print("Final Score:", statistics.mean([f1_1, f1_2, f1_3]))

mean = statistics.mean([f1_1, f1_2, f1_3])

print(f"{f1_1},{f1_2},{f1_3},{mean}", file = open("results-tests.txt", "w"))