In [36]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [37]:
class_to_index={'banana': 0, 'bareland': 1, 'building': 2, 'carrot': 3, 'corn': 4,
    'dragonfruit': 5, 'garlic': 6, 'guava': 7, 'mountain': 8, 'peanut': 9, 'pineapple': 10,
    'pumpkin': 11, 'rice': 12, 'sky': 13, 'soybean': 14, 'sugarcane': 15, 'tomato': 16}
index_to_class = {v: k for k, v in class_to_index.items()}

## Step 1: 載入b0_output.csv, Test20000.csv

In [38]:
df_filename = pd.read_csv('Test20000.csv')

In [39]:
df_predictions = pd.read_csv('b0_output.csv',header=None)

In [40]:
len(df_filename)

20000

In [41]:
len(df_predictions)

400000

In [42]:
df_filename.head()

Unnamed: 0,image_filename,label
0,0005479429a4.jpg,
1,0006849c44f2.jpg,
2,0006fcc93fc9.jpg,
3,00088803914c.jpg,
4,000b572940a1.jpg,


In [43]:
df_predictions.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,0.0,0.0,0.8904,0.0,0.0,0.0,0.0,0.0,0.0021,0.0,0.0,0.0,0.0,0.1075,0.0,0.0,0.0
1,0.0,0.0,0.9997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0002,0.0,0.0,0.0
2,0.0,0.0,0.931,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.069,0.0,0.0,0.0
3,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [44]:
def argmax(lst):
    return max(range(len(lst)), key=lst.__getitem__)

## Step 2: 計算每列預測值最大的類別

In [45]:
judgeClass = [0 for i in range(len(df_predictions))]
for index, row in df_predictions.iterrows():
  judgeClass[index] = argmax(list(row))

## Step 3: 計算每張圖(分成20張小圖)，統計類別的區域

In [46]:
def accumulate_blocks(stats, ii):
  if ii ==2 or ii==8 or ii==13:
    stats[ii] +=-1
    return
  elif ii ==1 or ii==15 or ii==0:
    stats[ii] +=1
    return
  stats[ii] +=2
  return

In [47]:
def predict(ii):
  stats = [0 for i in range(17)]
  for i in range(20):
    index = ii*20+i
    dets = judgeClass[index]
    accumulate_blocks(stats, dets)
  judge = argmax(stats)
  landtype = index_to_class[judge]
  return landtype

In [48]:
answer = []
correct_count =0
for index, row in df_filename.iterrows():
  filename = row[0]
  predicted_label = predict(index)
  answer.append(predicted_label)

In [49]:
print(answer[:5])

['corn', 'pineapple', 'bareland', 'garlic', 'peanut']


In [50]:
print(answer[:5])

['corn', 'pineapple', 'bareland', 'garlic', 'peanut']


## Step 4: 匯出結果

In [51]:
df_result = pd.DataFrame(df_filename['image_filename'])
df_result['label'] = pd.DataFrame(answer) 

In [52]:
df_result.head()

Unnamed: 0,image_filename,label
0,0005479429a4.jpg,corn
1,0006849c44f2.jpg,pineapple
2,0006fcc93fc9.jpg,bareland
3,00088803914c.jpg,garlic
4,000b572940a1.jpg,peanut


In [61]:
df_result.to_csv("resultfirst.csv", index=False )

## Step 5: 驗證 secret_labels.csv

In [54]:
df_labels = pd.read_csv('secret_labels.csv')

In [55]:
correct_count =0
for index, row in df_labels.iterrows():
  filename = row[0]
  label = row[1]
  predicted_label = answer[index]
  if predicted_label == label:
    correct_count += 1
  #else:
  #  print(filename, label, predicted_label)
accuracy = correct_count/len(df_labels) 
print(f"{accuracy:.3f}")

0.972


In [56]:
correct_count =0
for index, row in df_labels.iterrows():
  filename = row[0]
  label = row[1]
  predicted_label = answer[index]
  if predicted_label == label:
    correct_count += 1
  #else:
  #  print(filename, label, predicted_label)
accuracy = correct_count/len(df_labels) 
print(f"{accuracy:.3f}")

0.972


## Step 6: 計算分數
https://aidea-web.tw/topic/93c8c26b-0e96-44bc-9a53-1c96353ad340

評估方式採用Weighted-Precision(WP), 且各類別F1-score需大於0.7
$$WP={\sum_{i=1}^{NumofClass} (Precision_i \times (TP_i + FN_i)) \over TotalImageCount}$$

$$\text{F1-score}= 2 \times {(Precision \times Recall) \over (Precision + Recall) }$$

$$\text{Recall}= {TP \over TP + FN }$$

$$\text{Precision}= {TP \over TP + FP }$$

In [57]:
landtypes=['banana', 'bareland', 'carrot', 'corn', 'dragonfruit', 'garlic', 'guava', 'peanut', 'pineapple', 'pumpkin', 'rice', 'soybean', 'sugarcane', 'tomato']
print(len(landtypes))

14


In [58]:
TP = {lt:0 for lt in landtypes}
FN = {lt:0 for lt in landtypes}
FP = {lt:0 for lt in landtypes}
for index, row in df_labels.iterrows():
  label = row[1]
  predicted_label = answer[index]
  if predicted_label == label:
     TP[label] +=1
  else:
      FP[label] +=1
      FN[predicted_label] +=1

In [59]:
for lt in landtypes:
  if TP[lt]+FN[lt] >0:
    recall = TP[lt]/(TP[lt]+FN[lt])
  else:
    recall = 0
  if TP[lt]+FP[lt] >0:
    precision = TP[lt]/(TP[lt]+FP[lt])
  else:
    precision = 0
  f1_score = 2*precision*recall/(precision+recall)
  print(f"{lt:12}:precision={precision:.2f}:recall={recall:.2f}:f1-score={f1_score:.2f}")

banana      :precision=0.97:recall=0.99:f1-score=0.98
bareland    :precision=0.92:recall=0.99:f1-score=0.95
carrot      :precision=0.99:recall=0.95:f1-score=0.97
corn        :precision=0.99:recall=0.97:f1-score=0.98
dragonfruit :precision=1.00:recall=0.97:f1-score=0.98
garlic      :precision=0.98:recall=0.98:f1-score=0.98
guava       :precision=0.99:recall=0.99:f1-score=0.99
peanut      :precision=0.96:recall=0.97:f1-score=0.97
pineapple   :precision=1.00:recall=0.99:f1-score=0.99
pumpkin     :precision=0.99:recall=0.87:f1-score=0.93
rice        :precision=0.99:recall=0.98:f1-score=0.99
soybean     :precision=0.98:recall=0.90:f1-score=0.93
sugarcane   :precision=0.93:recall=0.97:f1-score=0.95
tomato      :precision=0.98:recall=0.95:f1-score=0.97


In [60]:
All=len(df_labels)
score = 0.0
for lt in landtypes:
  if TP[lt]+FN[lt] >0:
    recall = TP[lt]/(TP[lt]+FN[lt])
  else:
    recall = 0
  if TP[lt]+FP[lt] >0:
    precision = TP[lt]/(TP[lt]+FP[lt])
  else:
    precision = 0
  score += precision*(TP[lt]+FN[lt])

Weighted_Precision = score/All
print(f"Weighted_Precision={Weighted_Precision:.3f}")

Weighted_Precision=0.973
