In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Load the dataset 

In [None]:
import pathlib
import tensorflow as tf
data_dir = pathlib.Path('../input/pcbexperiment/dataset/tobeaugmented')

In [None]:
#count total number of images in the directory
image_count = len(list(data_dir.glob('*/*.jpg')))
print(image_count)

In [None]:
import os

defected_count = next(os.walk('../input/pcbexperiment/dataset/tobeaugmented/defected'))[2] 
dc=len(defected_count)
print("Images for defected PCB : ",dc)

In [None]:
import os

list = os.listdir('../input/pcbexperiment/dataset/tobeaugmented/non-defect') # dir is your directory path
nc = len(list)
print("Image count for non-defect PCB :",nc)

# Is the dataset imbalanced?
we can check the imbalanceness of the dataset for differet classes by plotting the bar graph using matplotlib library using this code

In [None]:
import matplotlib.pyplot as plt
x = ["Defected PCB","non-defect PCB"]
y = [dc,nc]
plt.barh(x, y)
for index, value in enumerate(y):
    plt.text(value, index, str(value))

Here,we see that the number of images for defected PCB/1 is very less compared to the non-defect PCB/0. Hence the dataset is imbalanced due to which the dominating class cause unfair results.

In [None]:
import os
onlyfiles = next(os.walk('../input/pcbexperiment/dataset/tobeaugmented/non-defect'))[2] #dir is your directory path as string
print(len(onlyfiles))

In [None]:
import numpy as np
import os
import PIL
import PIL.Image
import tensorflow as tf
import tensorflow_datasets as tfds

In [None]:
from PIL import Image
#...
defected_path='../input/pcbexperiment/dataset/tobeaugmented/defected/20200707_085703.jpg'
img = Image.open(defected_path)
PIL.Image.open(defected_path)

In [None]:
from PIL import Image
#...
nondefectpath='../input/pcbexperiment/dataset/tobeaugmented/non-defect/20200630_114408(1).jpg'
img = Image.open(nondefectpath)
PIL.Image.open(nondefectpath)

In [None]:
batch_size = 32
img_height = 180
img_width = 180

In [None]:
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

In [None]:
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

In [None]:
class_names = train_ds.class_names
print(class_names)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(class_names[labels[i]])
    plt.axis("off")

In [None]:
for image_batch, labels_batch in train_ds:
  print(image_batch.shape)
  print(labels_batch.shape)
  break

# Standardize the data
The RGB channel values are in the [0, 255] range. This is not ideal for a neural network; in general you should seek to make your input values small. Here, we will standardize values to be in the [0, 1] by using a Rescaling layer.

In [None]:
from tensorflow.keras import layers

normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
num_classes = 2

model = tf.keras.Sequential([
  layers.experimental.preprocessing.Rescaling(1./255),
  layers.Conv2D(32, 3, activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, activation='relu'),
  layers.MaxPooling2D(),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes)
])

In [None]:
model.compile(
  optimizer='Nadam',
  loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
  metrics=['accuracy'])

In [None]:
history=model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=3
)

In [None]:
loss, accuracy = model.evaluate(val_ds)

In [None]:
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()