Importing Libraries

In [None]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import skimage
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier
import pickle

Reading Dataset

In [None]:
df = pd.read_csv('train.csv')
df.head()

Labeling every image 

In [None]:
df['label'] = df[['healthy', 'multiple_diseases', 'rust', 'scab']].idxmax(axis=1)
df['label'] = df['label'].map({
    'healthy': 0,
    'multiple_diseases': 1,
    'rust': 2,
    'scab': 3
})
df.head()

Functions for feature extraction

In [None]:
def mean_brightness(img):
  hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
  h, s, v = cv2.split(hsv)
  return np.mean(v)

def std_brightness(img):
  hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
  h, s, v = cv2.split(hsv)
  return np.std(v)

def mean_saturation(img):
  hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
  h, s, v = cv2.split(hsv)
  return np.mean(s)

def std_saturation(img):
  hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
  h, s, v = cv2.split(hsv)
  return np.std(s)

def yellow_area_fraction(img):
  lower_yellow = np.array([20, 100, 100])
  upper_yellow = np.array([40, 255, 255])

  yellow_mask = cv2.inRange(img, lower_yellow, upper_yellow)
  yellow_pixels = np.sum(yellow_mask > 0)
  total_pixels = yellow_mask.size
  return yellow_pixels / total_pixels

def lbp_hist(img):
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  lbp = skimage.feature.local_binary_pattern(gray, P=8, R=1, method='uniform')
  lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 10), density=True)
  return lbp_hist

def number_of_blobs(img):
  lower_yellow = np.array([20, 100, 100])
  upper_yellow = np.array([40, 255, 255])

  yellow_mask = cv2.inRange(img, lower_yellow, upper_yellow)
  num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(yellow_mask, connectivity=8)
  blob_areas = stats[1:, cv2.CC_STAT_AREA]
  num_blobs = len(blob_areas)
  avg_blob_size = np.mean(blob_areas) if num_blobs > 0 else 0
  feature = []
  feature.append(num_blobs)
  feature.append(avg_blob_size)
  return feature

def dark_area_fraction(img, threshold=50):
    
    # Convert to HSV
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    v_channel = hsv[:, :, 2]

    # Make a mask of "dark" pixels
    dark_mask = v_channel < threshold

    # Compute fraction
    dark_pixels = np.sum(dark_mask)
    total_pixels = dark_mask.size

    return dark_pixels / total_pixels


def extract_features(img_path):
  img_path = os.path.join('images', img_path) + ".jpg"
  img = cv2.imread(img_path)
  img = cv2.resize(img, (500, 500))
  features = []
  features.append(mean_brightness(img))
  features.append(std_brightness(img))
  features.append(mean_saturation(img))
  features.append(std_saturation(img))
  features.append(yellow_area_fraction(img))
  features.extend(lbp_hist(img))
  features.extend(number_of_blobs(img))
  features.append(dark_area_fraction(img))
  features = np.array(features)
  return features

In [None]:
len(df)

Creating feature array

In [None]:
x_features = []
for image_id in tqdm(df['image_id']):
  features = extract_features(image_id)
  x_features.append(features)

x_features = np.array(x_features)
x_features.shape


In [None]:
y = df['label'].to_numpy()
x = x_features

Train Test Split

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

Model Training

In [None]:
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(x_train, y_train)

Predicting

In [None]:
predict = classifier.predict(x_test)
print(classification_report(y_test, predict))

In [None]:
with open('classifier.pkl', 'wb') as f:
    pickle.dump(classifier,f)


On Test.csv

In [None]:
test_df = pd.read_csv('test.csv')
test_df.head()

In [None]:
x_features = []
for image_id in tqdm(test_df['image_id']):
  features = extract_features(image_id)
  x_features.append(features)

x_features = np.array(x_features)
x_features.shape

In [None]:
result = classifier.predict(x_features)

In [None]:
result_df = test_df.copy()
result_df['Healthy'] = [1 if x==0 else 0 for x in result]
result_df['multiple_diseases'] = [1 if x==1 else 0 for x in result]
result_df['rust'] = [1 if x==2 else 0 for x in result]
result_df['scab'] = [1 if x==3 else 0 for x in result]

In [None]:
result_df.head()

In [None]:
result_df.to_csv('final_submission.csv',index=False)