In [None]:
import numpy as np 
import pandas as pd 

import os
import cv2
from tqdm import tqdm,trange,tqdm_notebook
import pathlib
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

import matplotlib.pyplot as plt

tqdm.pandas()


In [None]:
BASE_PATH = "/kaggle/input/plant-pathology-2020-fgvc7"
IMAGES_PATH = f"{BASE_PATH}/images/"
TRAIN_PATH = f"{BASE_PATH}/train.csv"
TEST_PATH = f"{BASE_PATH}/test.csv"
SUB_PATH = f"{BASE_PATH}/sample_submission.csv"

datasets_folder = os.path.join("/kaggle/input/plant-pathology-2020-fgvc7")



In [None]:
sub = pd.read_csv(SUB_PATH)
test_data = pd.read_csv(TEST_PATH)
train_data = pd.read_csv(TRAIN_PATH)
LABEL_COLS = ['healthy', 'multiple_diseases', 'rust', 'scab']

In [None]:
train_data.head()

In [None]:
plt.title('Label dist')
train_data[LABEL_COLS].idxmax(axis=1).value_counts().plot.bar()

In [None]:
def crop_image_with_canny(img):
    emb_img = img.copy()
    height, width, channels = img.shape
    edges = cv2.Canny(img, 100, 255)
    edge_coors = []
    for i in range(edges.shape[0]):
        for j in range(edges.shape[1]):
            if edges[i][j] != 0:
                edge_coors.append((i, j))

    row_min = edge_coors[np.argsort([coor[0] for coor in edge_coors])[0]][0]
    row_max = edge_coors[np.argsort([coor[0] for coor in edge_coors])[-1]][0]
    col_min = edge_coors[np.argsort([coor[1] for coor in edge_coors])[0]][1]
    col_max = edge_coors[np.argsort([coor[1] for coor in edge_coors])[-1]][1]
    new_img = img[row_min:row_max, col_min:col_max]
    return new_img


In [None]:
import albumentations
from albumentations import RandomCrop, Compose, HorizontalFlip, VerticalFlip, OneOf
from albumentations.core.transforms_interface import DualTransform
from albumentations.augmentations import functional as F


In [None]:
def augment(aug, image):
    '''
    image augmentation
    aug : augmentation from albumentations

    '''
    aug_img = aug(image=image)['image']
    return aug_img

def VH_augment(image):
    
    '''
    Vertical and horizontal flip image
    '''
    image = HorizontalFlip(p=1)(image=image)['image']
    image = VerticalFlip(p=1)(image=image)['image']
    return image

def strong_aug(p=1.0):
    
    '''
    4D - augmentations
    '''
    return  OneOf([
            HorizontalFlip(p=0.33),
            VerticalFlip(p=0.33),
           Compose([HorizontalFlip(p=1),
                    VerticalFlip(p=1)], p=0.33)
        ], p=1)



In [None]:
def my_resize(img, per):
  new_width = int(img.shape[1] * per)
  new_height = int(img.shape[0] * per)

  # Thay đổi kích thước của ảnh
  resized_image = cv2.resize(img, (new_width, new_height))
  return resized_image

In [None]:
#resize
tmp_img = cv2.imread(IMAGES_PATH + "/Train_0.jpg")
per = 0.1
new_width = int(tmp_img.shape[1] * per)
new_height = int(tmp_img.shape[0] * per)

In [None]:
def load_images_labels(image_id):
    file_path = image_id + ".jpg"
    img = cv2.imread(IMAGES_PATH + file_path)
    img = my_resize(img, 0.2) #resize
    
    image_id = int(image_id.replace("Train_", "")) #lấy index hàng
    
    return img, train_data.loc[image_id, LABEL_COLS].values

train_images_labels = train_data["image_id"].progress_apply(load_images_labels)
train_images, labels = zip(*train_images_labels)

In [None]:
def creat_augment(image_id):
  file_path = image_id + ".jpg"
  img = cv2.imread(IMAGES_PATH + file_path)
  img = my_resize(img, 0.2) #resize

  aug = strong_aug(p=1.0)
  img = augment(aug, img)
  image_id = int(image_id.replace("Train_", "")) #lấy index hàng

  return img, train_data.loc[image_id, LABEL_COLS].values

multiple_diseases = train_data[train_data['multiple_diseases'] == 1]
healthy = train_data[train_data['healthy'] == 1]
n = int(len(healthy)/len(multiple_diseases)) + 1

augmented_data = []  # Danh sách để lưu trữ dữ liệu tăng cường

for i in range(n):
    #augment dữ liệu multiple_diseases
    for _, row in multiple_diseases.iterrows():
        image_id = row["image_id"]
        augmented_image, labels = creat_augment(image_id)
        augmented_data.append((augmented_image, labels))
    
multiple_diseases =  pd.concat([multiple_diseases]*n, ignore_index=True)

augment_images_labels = multiple_diseases["image_id"].progress_apply(load_images_labels)

augment_images, augment_labels = zip(*augment_images_labels)


In [None]:
print(len(augment_images))

In [None]:
train_images = train_images + augment_images
labels = labels + augment_labels
print(len(train_images))

In [None]:
def my_generate(train_images):
    #crop
    train_images = [crop_image_with_canny(img) for img in tqdm(train_images, leave=True)]
    #Convolution 
    kernel = np.ones((6, 6), np.float32)/25
    train_images = [cv2.filter2D(img, -1, kernel) for img in tqdm(train_images, leave=True)]
    #blur
    train_images = [cv2.blur(img,(100,100)) for img in tqdm(train_images, leave=True)]
    #resize lại sau khi crop
    train_images = [cv2.resize(img,(128,128)) for img in tqdm(train_images)]
    
    return train_images

In [None]:
train_images = my_generate(train_images)

In [None]:
from skimage.feature import hog


In [None]:
#trích xuất đặc trưng
def Hog_color(image):
    # Convert image to grayscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Compute HOG features
    hog_features, hog_image = hog(gray_image, orientations=9, pixels_per_cell=(8, 8),
                                  cells_per_block=(2, 2), visualize=True, multichannel=False)

    # Compute color features
    color_features = image.flatten()

    # Concatenate HOG and color features
    features = np.concatenate((hog_features, color_features))
    
    return features
    
train_images = [Hog_color(img) for img in tqdm(train_images)]

In [None]:
print(len(train_images),'\t', len(labels))

In [None]:
# Flatten
train_images = [np.array(img).flatten() for img in tqdm(train_images)]
print(type(train_images))

In [None]:
# Convert list to numpy array
train_images = np.array(train_images)
# Trộn dữ liệu
for i in range(10):
    p = np.random.permutation(len(train_images))
    train_images = train_images[p]
    labels = np.array(labels)[p]

In [None]:
xtrain, xtest, ytrain, ytest = train_test_split(train_images, labels, test_size=0.2, random_state=22)
ytrain = np.array(ytrain).argmax(axis=1)
ytest = np.array(ytest).argmax(axis=1)

xtrain = np.vstack(xtrain)
xtest = np.vstack(xtest)
print(len(xtrain), len(ytrain))

In [None]:
c = 10
model = SVC(kernel="rbf", C=c, probability=True)

model.fit(xtrain, ytrain)
predictions = model.predict(xtest)

acc_train = model.score(xtrain, ytrain)
print(f"C = {c} --> Accuracy_train = {acc_train}")
acc_test = model.score(xtest, ytest)
print(f"C = {c} --> Accuracy_test = {acc_test}")

In [None]:
test_data = pd.read_csv(TEST_PATH)

In [None]:
def load_test_images(image_id):
    file_path = image_id + ".jpg"
    img = cv2.imread(IMAGES_PATH + file_path)
    img = my_resize(img, 0.2) #resize

    image_id = int(image_id.replace("Test_", "")) #lấy index hàng
    
    return img, train_data.loc[image_id, LABEL_COLS].values

test_img_label_temp = test_data["image_id"].progress_apply(load_test_images)
test_images, garbage  = zip(*test_img_label_temp)

In [None]:
def load_test_images(image_id):
    file_path = image_id + ".jpg"
    img = cv2.imread(IMAGES_PATH + file_path)
    img = my_resize(img, 0.2) #resize

    image_id = int(image_id.replace("Test_", "")) #lấy index hàng
    
    return img, train_data.loc[image_id, LABEL_COLS].values

test_img_label_temp = test_data["image_id"].progress_apply(load_test_images)
test_images, garbage  = zip(*test_img_label_temp)

In [None]:
test_images = my_generate(test_images)


In [None]:
test_images = [Hog_color(img) for img in tqdm(test_images) if Hog_color(img) is not None]

In [None]:
# Flatten
test_images = [np.array(img).flatten() for img in tqdm(test_images)]

In [None]:
# Convert list to numpy array
test_images = np.array(test_images)

In [None]:
test_images = np.vstack(test_images)

In [None]:
# Dùng mô hình SVM để dự đoán nhãn cho các mẫu trong X_test
y_prob = model.predict_proba(test_images)


In [None]:
print(y_prob)

In [None]:
# Tạo một numpy array chứa các giá trị ID cho từng mẫu dữ liệu
ids = np.arange(0, len(y_prob))

# Tạo một numpy array chứa các giá trị image_id cho từng mẫu dữ liệu
image_ids = np.array(['Test_' + str(i) for i in ids])

# Tạo một dataframe để lưu trữ các giá trị dự đoán và image_id
df = pd.DataFrame({'image_id': image_ids, 'healthy': y_prob[:,0], 'multiple_diseases': y_prob[:,1], 'rust': y_prob[:,2], 'scab': y_prob[:,3]})

# Thiết lập cột image_id làm cột index của dataframe
df.set_index('image_id', inplace=True)

# Ghi dataframe vào file csv
df.to_csv('submission.csv')