<a href="https://colab.research.google.com/github/GaoangLiu/AA_ipynb/blob/master/Plant_Pathology_2020_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Objectives of ‘Plant Pathology Challenge’ are to train a model using images of training dataset to 
1. Accurately classify a given image from testing dataset into different diseased category or a healthy leaf; 
2. Accurately distinguish between many diseases, sometimes more than one on a single leaf; 
3. Deal with rare classes and novel symptoms; 
4. Address depth perception—angle, light, shade, physiological age of the leaf; and 
5. Incorporate expert knowledge in identification, annotation, quantification, and guiding computer vision to search for relevant features during learning.

Main page: [https://www.kaggle.com/c/plant-pathology-2020-fgvc7/](https://www.kaggle.com/c/plant-pathology-2020-fgvc7/)

In [0]:
# Load packages 
import math
import re
import os
import timeit
import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import nltk
import logging
import time
import smart_open
import importlib

from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
logging.basicConfig(format='[%(asctime)s %(levelname)8s] %(message)s', level=logging.INFO, datefmt='%m-%d %H:%M:%S')

from keras import layers, Input
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Sequential, Model, load_model
from keras.layers import Flatten, Dense, Embedding, Dropout, LSTM, GRU, Bidirectional
from keras.utils import to_categorical
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.image import ImageDataGenerator

!pip install efficientnet
import efficientnet.keras as efn 

import gensim.downloader as api

import tensorflow_hub as hub 
import tensorflow as tf

In [0]:
! test -f plant.zip || wget -O plant.zip bwg.140714.xyz:8000/plant-pathology-2020-fgvc7.zip
! unzip plant.zip

# Explore data

In [0]:
train = pd.read_csv('train.csv')
train['image_id'] = train['image_id'] + '.jpg'
train.sample(10)

In [0]:
train_data_gen = ImageDataGenerator(
    horizontal_flip=True,
    vertical_flip=True,
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=.1,
    rescale=1/255,
    fill_mode='nearest',
    shear_range=0.1,
    brightness_range=[0.5, 1.5])

img_shape = 300
batch_size = 64

X_train, X_val = train_test_split(train, test_size=0.2, random_state=2020)

train_generator = train_data_gen.flow_from_dataframe(X_train, directory='images/',
                                                      target_size=(img_shape, img_shape),
                                                      x_col="image_id",
                                                      y_col=['healthy','multiple_diseases','rust','scab'],
                                                      class_mode='raw',
                                                      shuffle=False,
                                                      subset='training',
                                                      batch_size=batch_size)

val_generator = train_data_gen.flow_from_dataframe(X_val, directory='images/',
                                                      target_size=(img_shape, img_shape),
                                                      x_col="image_id",
                                                      y_col=['healthy','multiple_diseases','rust','scab'],
                                                      class_mode='raw',
                                                      shuffle=False,
                                                      batch_size=batch_size,
                                                  )


model = Sequential()
model.add(layers.Conv2D(32, (7, 7), activation='relu', input_shape=(img_shape, img_shape, 3)))
model.add(layers.MaxPool2D((2, 2)))
model.add(layers.Conv2D(64, (7, 7), activation='relu'))
model.add(layers.MaxPool2D((2, 2)))
model.add(layers.Conv2D(128, (7, 7), activation='relu'))
model.add(layers.MaxPool2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(512, activation='relu'))
model.add(Dense(4,activation='softmax'))
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit_generator(train_generator, steps_per_epoch=batch_size, epochs=5, validation_data=val_generator, validation_steps=50)
2

Found 1456 validated image filenames.
Found 365 validated image filenames.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5