In [2]:
# 1: Identifying Problem
# Binary classification task to classify images into "stain" and "defect-free" categories.

# 2: Data Understanding

## Data Collection
from google.colab import drive
drive.mount('/content/drive')

import os
import cv2
import numpy as np

dataset_dir = "/content/drive/My Drive/miniproject_daminda_sir/dataset/images"
categories = ["stain", "defect_free"]
image_size = (150, 150)

annotation_dir = os.path.join(dataset_dir, "annotation")
if os.path.exists(annotation_dir):
    for file in os.listdir(annotation_dir):
        os.remove(os.path.join(annotation_dir, file))
    os.rmdir(annotation_dir)
    print("Annotation folder and files removed successfully.")

## Data Exploration
images = []
labels = []

for category in categories:
    category_path = os.path.join(dataset_dir, category)
    label = categories.index(category)
    for img_filename in os.listdir(category_path):
        try:
            img = cv2.imread(os.path.join(category_path, img_filename))
            img = cv2.resize(img, image_size)
            images.append(img)
            labels.append(label)
        except Exception as e:
            print("Error loading image:", e)

images = np.array(images)
labels = np.array(labels)

num_stain_images = np.sum(labels == 0)
num_defect_free_images = np.sum(labels == 1)
image_dimensions 
= images.shape[1:]

print("Number of stain images:", num_stain_images)
print("Number of defect-free images:", num_defect_free_images)
print("Image dimensions:", image_dimensions)

# Descriptive Analysis
mean_pixel_value = np.mean(images)
std_pixel_value = np.std(images)

print("Mean Pixel Value:", mean_pixel_value)
print("Standard Deviation of Pixel Value:", std_pixel_value)

class_distribution = {category: np.sum(labels == categories.index(category)) for category in categories}
print("Class Distribution:", class_distribution)

# 3: Data Preprocessing

## Cleaning (Already done by removing the "annotation" folder)

## Transformation
images = images / 255.0

# 4: Data Mining

## Predictive Analysis

from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Build CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=image_dimensions),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test), verbose=1)

# 5: Evaluation and Interpretation

## Results
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", accuracy)



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Number of stain images: 398
Number of defect-free images: 68
Image dimensions: (150, 150, 3)
Mean Pixel Value: 166.10313991416308
Standard Deviation of Pixel Value: 31.830945249282617
Class Distribution: {'stain': 398, 'defect_free': 68}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 0.7978723645210266


In [3]:
import tensorflow as tf

# Save Keras model
model.save("model_unquant.h5")

# Convert to TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
model_tflite = converter.convert()

# Save TFLite model to file
with open("model_unquant.tflite", "wb") as f:
    f.write(model_tflite)


  saving_api.save_model(
