<a href="https://colab.research.google.com/github/Bziukiewicz/tests/blob/main/healthyrotten.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("muhammad0subhan/fruit-and-vegetable-disease-healthy-vs-rotten")

print("Path to dataset files:", path)

In [None]:
import shutil

# Zmienna `path` zawiera ścieżkę, którą zwraca kagglehub
# Skopiuj dane do /content/dataset
shutil.copytree(path, "/content/dataset", dirs_exist_ok=True)


In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import TensorBoard


In [None]:
!rm -rf ./images

In [None]:
base_dir='./dataset/Fruit And Vegetable Diseases Dataset'
data_dir='./images'
if not os.path.exists(data_dir):
  os.mkdir(data_dir)
train_dir=os.path.join(data_dir,'train')
valid_dir=os.path.join(data_dir,'valid')
test_dir=os.path.join(data_dir,'test')
train_healthy_dir=os.path.join(train_dir,'healthyapple')
train_rotten_dir=os.path.join(train_dir,'rottenapple')
valid_healthy_dir=os.path.join(valid_dir,'healthyapple')
valid_rotten_dir=os.path.join(valid_dir,'rottenapple')
test_healthy_dir=os.path.join(test_dir,'healthyapple')
test_rotten_dir=os.path.join(test_dir,'rottenapple')

for directory in (train_dir,valid_dir,test_dir):
  if not os.path.exists(directory):
    os.mkdir(directory)

for directory in (train_healthy_dir,train_rotten_dir,valid_healthy_dir,valid_rotten_dir,test_healthy_dir,test_rotten_dir):
  if not os.path.exists(directory):
    os.mkdir(directory)


In [None]:
healthy_fnames=os.listdir(os.path.join(base_dir,'Apple__Healthy'))
rotten_fnames=os.listdir(os.path.join(base_dir,'Apple__Rotten'))

healthy_fnames=[fname for fname in healthy_fnames if fname.split('.')[1].lower() in ['jpg','jpeg','png']]
rotten_fnames=[fname for fname in rotten_fnames if fname.split('.')[1].lower() in ['jpg','jpeg','png']]

In [None]:
size=min(len(rotten_fnames),len(healthy_fnames))
train_size=int(np.floor(size*0.7))
valid_size=int(np.floor(size*0.2))
test_size=int(np.floor(size*0.1))
train_idx=train_size
valid_idx=train_idx+valid_size
test_idx=valid_idx+test_size

In [None]:
for i, fname in enumerate(healthy_fnames):
  if i<=train_idx:
    src=os.path.join(base_dir,'Apple__Healthy', fname)
    dst=os.path.join(train_healthy_dir, fname)
    shutil.copyfile(src,dst)
  elif train_idx<i<=valid_idx:
    src=os.path.join(base_dir,'Apple__Healthy', fname)
    dst=os.path.join(valid_healthy_dir, fname)
    shutil.copyfile(src,dst)
  elif valid_idx<i<test_idx:
    src=os.path.join(base_dir,'Apple__Healthy', fname)
    dst=os.path.join(test_healthy_dir, fname)
    shutil.copyfile(src,dst)

for i, fname in enumerate(rotten_fnames):
  if i<=train_idx:
    src=os.path.join(base_dir,'Apple__Rotten', fname)
    dst=os.path.join(train_rotten_dir, fname)
    shutil.copyfile(src,dst)
  elif train_idx<i<=valid_idx:
    src=os.path.join(base_dir,'Apple__Rotten', fname)
    dst=os.path.join(valid_rotten_dir, fname)
    shutil.copyfile(src,dst)
  elif valid_idx<i<test_idx:
    src=os.path.join(base_dir,'Apple__Rotten', fname)
    dst=os.path.join(test_rotten_dir, fname)
    shutil.copyfile(src,dst)



In [None]:
len(os.listdir(train_rotten_dir))

In [None]:
train_datagen=ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    brightness_range=[0.5,2.0],
    zoom_range=0.2,
    horizontal_flip=True,
    rescale=1./255)
valid_datagen=ImageDataGenerator(
    rescale=1./255
)
train_generator=train_datagen.flow_from_directory(
    directory=train_dir,
    target_size=(150,150),
    batch_size=32,
    class_mode='binary'
)
valid_generator=valid_datagen.flow_from_directory(
    directory=valid_dir,
    target_size=(150,150),
    batch_size=32,
    class_mode='binary'
)


In [None]:
def display_augmented_images(directory, idx):
    """
    Funkcja zwraca wykres przykładowych obrazów uzyskanych za pomocą techniki
    augmentacji danych.
    """
    fnames = [os.path.join(directory, fname) for fname in os.listdir(directory)]
    img_path = fnames[idx]
    img = image.load_img(img_path, target_size=(150, 150))

    x = image.img_to_array(img)
    x = x.reshape((1, ) + x.shape)

    i = 1
    plt.figure(figsize=(16, 8))
    for batch in train_datagen.flow(x, batch_size=1):
        plt.subplot(3, 4, i)
        plt.grid(False)
        imgplot = plt.imshow(image.array_to_img(batch[0]))
        i += 1
        if i % 13 == 0:
            break

In [None]:
#@title Zbiór treningowy drone:
idx = 17 #@param {type:'slider', min:0, max:409}
display_augmented_images(train_rotten_dir, idx)

In [None]:
model=Sequential()
model.add(layers.Conv2D(32,(3,3),activation='relu',input_shape=(150,150,3)))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(64,(3,3),activation='relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(128,(3,3),activation='relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(128,(3,3),activation='relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Flatten())
model.add(layers.Dense(512,'relu'))
model.add(layers.Dense(1,'sigmoid'))
model.summary()

In [None]:
model.compile('rmsprop','binary_crossentropy',metrics=['accuracy'])

In [None]:
batch_size=32
steps_per_epoch=train_size//batch_size
validation_steps=valid_size//batch_size

history=model.fit(x=train_generator,
                  steps_per_epoch=steps_per_epoch,
                  epochs=40,
                  validation_data=valid_generator,
                  validation_steps=validation_steps)

  self._warn_if_super_not_called()


Epoch 1/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 3s/step - accuracy: 0.5196 - loss: 0.9695 - val_accuracy: 0.5000 - val_loss: 0.7214
Epoch 2/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 3s/step - accuracy: 0.5087 - loss: 0.7095 - val_accuracy: 0.3542 - val_loss: 0.6889
Epoch 3/40
[1m 2/12[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m15s[0m 2s/step - accuracy: 0.5000 - loss: 0.6951



[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 639ms/step - accuracy: 0.5521 - loss: 0.6945 - val_accuracy: 0.4792 - val_loss: 0.6882
Epoch 4/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 3s/step - accuracy: 0.4842 - loss: 0.6928 - val_accuracy: 0.5000 - val_loss: 0.6824
Epoch 5/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 3s/step - accuracy: 0.5718 - loss: 0.6954 - val_accuracy: 0.7500 - val_loss: 0.6627
Epoch 6/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 801ms/step - accuracy: 0.5046 - loss: 0.6958 - val_accuracy: 0.6875 - val_loss: 0.6760
Epoch 7/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 2s/step - accuracy: 0.5790 - loss: 0.6980 - val_accuracy: 0.7396 - val_loss: 0.6385
Epoch 8/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 3s/step - accuracy: 0.6497 - loss: 0.6641 - val_accuracy: 0.6354 - val_loss: 0.6757
Epoch 9/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━

In [None]:
def plot_hist(history):
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=hist['epoch'], y=hist['accuracy'], name='accuracy', mode='markers+lines'))
    fig.add_trace(go.Scatter(x=hist['epoch'], y=hist['val_accuracy'], name='val_accuracy', mode='markers+lines'))
    fig.update_layout(width=1000, height=500, title='Accuracy vs. Val Accuracy', xaxis_title='Epoki', yaxis_title='Accuracy', yaxis_type='log')
    fig.show()

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=hist['epoch'], y=hist['loss'], name='loss', mode='markers+lines'))
    fig.add_trace(go.Scatter(x=hist['epoch'], y=hist['val_loss'], name='val_loss', mode='markers+lines'))
    fig.update_layout(width=1000, height=500, title='Loss vs. Val Loss', xaxis_title='Epoki', yaxis_title='Loss', yaxis_type='log')
    fig.show()

plot_hist(history)

In [None]:
test_datagen = ImageDataGenerator(rescale=1./255.)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=1,
    class_mode='binary',
    shuffle=False
)

y_prob = model.predict(test_generator, test_generator.samples)
y_prob = y_prob.ravel()
y_prob

In [None]:
predictions  = pd.DataFrame({'y_prob': y_prob})
predictions['class'] = predictions['y_prob'].apply(lambda x: 1 if x > 0.5 else 0)
predictions

In [None]:
y_true = test_generator.classes
y_true

In [None]:
y_pred = predictions['class'].values
y_pred

In [None]:
test_generator.class_indices

In [None]:
cm = confusion_matrix(y_true, y_pred)
cm

In [None]:
errors = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred}, index=test_generator.filenames)
errors.head()

In [None]:
errors['is_incorrect'] = (errors['y_true'] != errors['y_pred']) * 1
errors

In [None]:
errors[errors['is_incorrect'] == 1].index

In [None]:
img_path = os.path.join(test_rotten_dir, 'rottenApple (87).jpg')

img = image.load_img(img_path)

plt.figure(figsize=(10, 10))
plt.imshow(img)
plt.grid(False)
plt.axis(False)