## 데이터 셋

In [1]:
import cv2
import os
import warnings
import numpy as np
from sklearn.metrics import classification_report

###### Bing Image Downloader 패키기 설치하기
!pip install bing_image_downloader

###### 다운로드할 이미지 키워드와 개수 지정하여 다운로드하기
from bing_image_downloader import downloader
import os

keywords = input("검색할 이미지 키워드를 입력하세요 (예: 노란바나나) => ")
limit = int(input("다운로드할 이미지의 개수를 입력하세요 =>  "))

###### 현재 디렉토리의 절대 경로 가져오기
current_directory = os.path.abspath('.')

###### 다운로드 정보 기록
record = {
    "keywords": keywords,
    "limit": limit
}

###### 이미지 다운로드 실행
downloader.download(
    record['keywords'], 
    limit=record['limit'],
    output_dir=current_directory,  # 현재 디렉토리를 다운로드 경로로 설정
    adult_filter_off=True,
    force_replace=False,
    timeout=60
)

In [2]:
# 특성을 추출하는 함수 정의(평균 색상)
def averagecolor(image):
    return np.mean(image, axis=(0, 1))

In [3]:
def evaluateaccuracy(filenames,predictedY):
    predictedY = np.array(predictedY)
    if (np.sum(realtestY!=predictedY)>0):
        print ("Wrong Predictions: (filename, labelled, predicted) ")
        print (np.dstack([filenames,realtestY,predictedY]).squeeze()[(realtestY!=predictedY)])
    # 전체 예측의 백분율로 일치하는 (정확한) 예측을 계산합니다.
    return "Correct :"+ str(np.sum(realtestY==predictedY)) + ". Wrong: "+str(np.sum(realtestY!=predictedY)) + ". Correctly Classified: " + str(np.sum(realtestY==predictedY)*100/len(predictedY))+"%"

In [4]:
trainX = []
trainY = []

# 이미지 하위 디렉토리 4개 폴더에 있는 훈련 이미지를 반복합니다.
path = "banana/"
class_name = ['ripe','unripe','overripe']

# 보통 data 불러와서 구성을 이런식으로 함
for label in class_name:
    print ("Loading training images for the label: "+label)
    
    # 하위 폴더의 모든 이미지를 읽어옵니다.
    for filename in os.listdir(path+label+"/"): 
        img = cv2.imread(path+label+"/"+filename)
        img_features = averagecolor(img)
        trainX.append(img_features)
        trainY.append(label)

Loading training images for the label: ripe
Loading training images for the label: unripe
Loading training images for the label: overripe


In [5]:
path = "banana/test/"
filenames = []
predictedY = []
realtestY = []

for label in class_name:
    #print ("Loading training images for the label: "+label)
    
    for filename in os.listdir(path+label+"/"):
        #print(path+label+"/"+filename)
        
        img = cv2.imread(path+label+"/"+filename)
        img_features = averagecolor(img)
        
        calculated_distances = []
        for card in (trainX):
            calculated_distances.append(np.linalg.norm(img_features-card))
        prediction = trainY[np.argmin(calculated_distances)]

        print (filename + ": " + prediction) # 추론을 출력합니다.
        filenames.append(filename)
        realtestY.append(label)
        predictedY.append(prediction)

# 정확도 평가(sklearn 패키지는 유용한 보고서를 제공합니다)
print()
print(classification_report(realtestY, predictedY))

# 정확도 평가(잘못 분류된 항목의 파일 이름을 출력하기 위한 자체 사용자 정의 메소드)
print ()
print (evaluateaccuracy(filenames,predictedY))


Image_134.jpg: ripe
Image_139.jpg: ripe
Image_142.jpg: overripe
Image_147.jpg: ripe
Image_149.jpg: ripe
Image_157.jpg: ripe
Image_161.jpg: overripe
Image_166.jpg: overripe
Image_181.png: ripe
Image_186.jpg: overripe
Image_101.jpg: overripe
Image_106.jpg: unripe
Image_117.jpg: unripe
Image_163.jpg: unripe
Image_42.jpg: unripe
Image_84.jpg: overripe
Image_93.jpg: unripe
Image_95.jpg: unripe
Image_96.jpg: unripe
Image_99.jpg: unripe
Image_104.jpg: overripe
Image_131.jpg: ripe
Image_135.jpg: overripe
Image_15.jpg: overripe
Image_159.jpg: ripe
Image_19.jpg: overripe
Image_75.jpg: ripe
Image_76.jpg: overripe
Image_86.jpg: overripe
Image_96.jpg: ripe

              precision    recall  f1-score   support

    overripe       0.50      0.60      0.55        10
        ripe       0.60      0.60      0.60        10
      unripe       1.00      0.80      0.89        10

    accuracy                           0.67        30
   macro avg       0.70      0.67      0.68        30
weighted avg       0.

In [6]:
# SVM은 숫자 값을 사용하기 때문에 먼저 레이블을 숫자로 인코딩합니다.
from sklearn.preprocessing import LabelEncoder  # 레이블을 숫자로 인코딩

encoder = LabelEncoder()                         # 레이블을 숫자로 인코딩
encodedtrainY = encoder.fit_transform(trainY)  # 레이블을 숫자로 인코딩

from sklearn import svm
#model = svm.SVC(gamma="scale", decision_function_shape='ovr')
model = svm.SVC(gamma=0.001, decision_function_shape='ovr')
model.fit(trainX, encodedtrainY)

In [7]:
print (trainY)

['ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'ripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'unripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe', 'overripe']


In [8]:
import os
path = "banana/test/"

filenames = []
predictedY = []
realtestY = []


for label in class_name:
    #print ("Loading training images for the label: "+label)
    
    for filename in os.listdir(path+label+"/"):
        #print(path+label+"/"+filename)
        
        img = cv2.imread(path+label+"/"+filename)
        img_features = averagecolor(img)
        prediction = model.predict([img_features])[0]

        # 예측을 코드화합니다.
        prediction = encoder.inverse_transform([prediction])[0]

        print (filename + ": " + prediction)
        filenames.append(filename)
        realtestY.append(label)
        predictedY.append(prediction)

# 정확도 평가(sklearn 패키지는 유용한 보고서를 제공합니다)
print ()
print(classification_report(realtestY, predictedY))

# 정확도 평가
print (evaluateaccuracy(filenames,predictedY))

Image_134.jpg: ripe
Image_139.jpg: overripe
Image_142.jpg: overripe
Image_147.jpg: ripe
Image_149.jpg: ripe
Image_157.jpg: ripe
Image_161.jpg: overripe
Image_166.jpg: overripe
Image_181.png: ripe
Image_186.jpg: ripe
Image_101.jpg: overripe
Image_106.jpg: unripe
Image_117.jpg: unripe
Image_163.jpg: unripe
Image_42.jpg: unripe
Image_84.jpg: unripe
Image_93.jpg: unripe
Image_95.jpg: unripe
Image_96.jpg: unripe
Image_99.jpg: unripe
Image_104.jpg: overripe
Image_131.jpg: ripe
Image_135.jpg: overripe
Image_15.jpg: overripe
Image_159.jpg: ripe
Image_19.jpg: overripe
Image_75.jpg: ripe
Image_76.jpg: overripe
Image_86.jpg: overripe
Image_96.jpg: ripe

              precision    recall  f1-score   support

    overripe       0.55      0.60      0.57        10
        ripe       0.60      0.60      0.60        10
      unripe       1.00      0.90      0.95        10

    accuracy                           0.70        30
   macro avg       0.72      0.70      0.71        30
weighted avg       0.72