# Installs

In [1]:
%pip install opencv-python
%pip install tqdm
%pip install mlxtend
%pip install xgboost
%pip install lightgbm
%pip install opendatasets
%pip install tensorflow
%pip install keras

## Imports

In [21]:
import cv2
import glob
import os
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import string
from mlxtend.plotting import plot_decision_regions
from mpl_toolkits.mplot3d import Axes3D

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler as SS
from sklearn.neighbors import KNeighborsClassifier as KNC
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.svm import SVC,LinearSVC
from xgboost import XGBClassifier as XGB
from lightgbm import LGBMClassifier as LGB

from sklearn.model_selection import train_test_split as tts
from sklearn.model_selection import cross_val_score as cvs
from sklearn.utils.multiclass import unique_labels
from sklearn import metrics

import opendatasets as od
from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

# Loading the data

In [3]:
data_dir = '../input/fruits/fruits-360_dataset/fruits-360'
print(os.listdir(data_dir))
classes = os.listdir(data_dir + '/Training')
print(len(classes))
#train_dir = '../input/fruits/fruits-360_dataset/fruits-360/Training'

In [4]:
def getYourFruits(fruits,img_dim, split, print_n=False):
    images = []
    labels = []
    path = data_dir + "/" + split + "/"
    for i,f in enumerate(fruits):
        p = path + f
        j=0
        for image_path in glob.glob(os.path.join(p, "*.jpg")):
            image = cv2.imread(image_path, cv2.IMREAD_COLOR)
            image = cv2.resize(image, (img_dim, img_dim))
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            images.append(image)
            labels.append(i)
            j+=1
        if(print_n):
            print("There are " , j , " " , split.upper(), " images of " , fruits[i].upper())
    images = np.array(images)
    labels = np.array(labels)
    return images, labels

def getAllFruits():
    fruits = []
    for fruit_path in glob.glob(data_dir + "/Training/*"):
        fruit = fruit_path.split("/")[-1]
        fruits.append(fruit)
    return fruits

In [5]:
fruits_arr = getAllFruits()
len(fruits_arr)

In [6]:
X_train, y_train =  getYourFruits(fruits_arr,20,'Training')
X_test, y_test = getYourFruits(fruits_arr,20,'Test')

In [7]:
#Scale Data Images
scaler = SS()
X_train_scaled = scaler.fit_transform([i.flatten() for i in X_train])
X_test_scaled= scaler.fit_transform([i.flatten() for i in X_test])

# Autoencoder

In [25]:
from keras.layers import Conv2D, Conv2DTranspose, UpSampling2D, MaxPool2D, Flatten, BatchNormalization
from keras.layers import Conv1D, MaxPool1D, CuDNNLSTM, Reshape,MaxPooling2D
from keras.layers import Input, Dense, Dropout, Activation, Add, Concatenate
from keras import regularizers
from keras.models import Model, Sequential
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.optimizers import SGD, Adam, RMSprop, Adadelta
import keras.backend as K
from keras.metrics import mean_squared_error
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils

from sklearn.utils import class_weight
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.pipeline import Pipeline

In [10]:
def create_block(input, chs): ## Convolution block of 2 layers
    x = input
    for i in range(2):
        x = Conv2D(chs, 3, padding="same")(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
    return x

In [11]:
def autoencoder():
    input = Input((20,20,3))
    
    # Encoder
    block1 = create_block(input, 32)
    x = MaxPool2D(2)(block1)
    block2 = create_block(x, 64)
    
    #Middle
    x = MaxPool2D(2)(block2)
    middle = create_block(x, 128)
    
    # Decoder
    block3 = create_block(middle, 64)
    up1 = UpSampling2D((2,2))(block3)
    block4 = create_block(up1, 32)
    up2 = UpSampling2D((2,2))(block4)
    
    # output
    x = Conv2D(3, 1)(up2)
    output = Activation("relu")(x)
    return Model(input, middle), Model(input, output)

In [12]:
def loss_function(y_true, y_pred):  ## loss function for using in autoencoder models
    mses = mean_squared_error(y_true, y_pred)
    return K.sum(mses, axis=(1,2))

In [13]:
encoder_ae, model_ae = autoencoder()
model_ae.compile( optimizer='adam', loss=loss_function,metrics=['accuracy'])
model_ae.summary()

In [14]:
X_train=X_train/255
X_test=X_test/255

In [15]:
history = model_ae.fit(X_train, X_train, 
                       batch_size=64,
                       epochs=50,validation_data=(X_test,X_test))

In [16]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [17]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [18]:
encoder_train=encoder_ae.predict(X_train)
encoder_test=encoder_ae.predict(X_test)

In [42]:
scaler1 = SS()
encoder_train_scaled = scaler1.fit_transform([i.flatten() for i in encoder_train])
encoder_test_scaled= scaler1.fit_transform([i.flatten() for i in encoder_test])

In [22]:
y=pd.get_dummies(y_test)

In [23]:
y_train1=pd.get_dummies(y_train)

# Classifier

In [34]:
model = LinearSVC()
model.fit(encoder_train_scaled, y_train) 
yptr = model.predict(encoder_train_scaled) 
ypts = model.predict(encoder_test_scaled) 
ypptr = model.predict(encoder_train_scaled)
yppts = model.predict(encoder_test_scaled)


In [35]:
from sklearn.metrics import accuracy_score as acs
from sklearn.metrics import f1_score as f1s
from sklearn.metrics import roc_auc_score as ras
from sklearn.metrics import log_loss as ll
from sklearn.metrics import classification_report as cr
from sklearn.metrics import RocCurveDisplay as rcd

In [36]:
print("SVM with linear kernel-")
print(f"Train: acc: {acs(y_train, yptr)} f1: {f1s(y_train,yptr,average='macro')} ")
print(f"Test: acc: {acs(y_test, ypts)} f1: {f1s(y_test,ypts, average='macro')} ")

In [37]:
ypptd=pd.get_dummies(yppts)
print("ROC for SVM with linear krnel with Autoencoder inputs :",ras(y,ypptd))

In [39]:
model1 = KNC(n_neighbors=5)
model1.fit(X_train_scaled, y_train) 
yptr1 = model1.predict(X_train_scaled) 
ypts1 = model1.predict(X_test_scaled) 
ypptr1 = model1.predict(X_train_scaled)
yppts1 = model1.predict(X_test_scaled)


In [41]:
print("KNN with n_neighbors=5 -")
print(f"Train: acc: {acs(y_train, yptr1)} f1: {f1s(y_train,yptr1,average='macro')}")
print(f"Test: acc: {acs(y_test, ypts1)} f1: {f1s(y_test,ypts1,average='macro')}")

In [40]:
ypptd1=pd.get_dummies(yppts1)
print("KNN with n_neighbors=5 with Autoencoder inputs :",ras(y,ypptd1))