Data preprossing


In [16]:
import os
import numpy as np
import cv2
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define paths
dataset_path = r'C:\Users\dhars\Downloads\Body fluid cell Dataset\Body fluid cell\Dataset'  # Path where your dataset is located
output_path = r'C:\Users\dhars\Downloads\Body fluid cell Dataset\Body fluid cell\Preprossed_data'  # Path where you want to save preprocessed data

# Desired image size (reduce this to fit in memory)
desired_size = (128, 128)

# Create output directories
os.makedirs(output_path, exist_ok=True)

# Function to load images and labels from a given directory
def load_data(data_dir, class_label):
    images = []
    labels = []
    print(f"Loading data from {data_dir}...")
    for img_name in os.listdir(data_dir):
        img_path = os.path.join(data_dir, img_name)
        if img_path.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
            img = cv2.imread(img_path)
            if img is None:
                print(f"Failed to load image: {img_path}")
                continue
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, desired_size)  # Resize image
            images.append(img)
            labels.append(class_label)
            print(f"Loaded image: {img_path}")  # Debug information
        else:
            print(f"Ignoring non-image file: {img_path}")
    return np.array(images), np.array(labels)

# Define a single class label for all images (change if you have multiple classes)
class_label = 'body_fluid_cell'

# Load training data
train_dir = os.path.join(dataset_path, 'train')
X_train, y_train = load_data(train_dir, class_label)
print(f"Loaded training data: {X_train.shape}, {y_train.shape}")

# Load validation data
val_dir = os.path.join(dataset_path, 'valid')
X_val, y_val = load_data(val_dir, class_label)
print(f"Loaded validation data: {X_val.shape}, {y_val.shape}")

# Load test data
test_dir = os.path.join(dataset_path, 'test')
X_test, y_test = load_data(test_dir, class_label)
print(f"Loaded test data: {X_test.shape}, {y_test.shape}")

# Ensure that the loaded data is not empty
assert X_train.size > 0, "X_train is empty"
assert y_train.size > 0, "y_train is empty"
assert X_val.size > 0, "X_val is empty"
assert y_val.size > 0, "y_val is empty"
assert X_test.size > 0, "X_test is empty"
assert y_test.size > 0, "y_test is empty"

# Normalize images
X_train = X_train.astype('float32') / 255.0
X_val = X_val.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# Encode labels
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_val = label_encoder.transform(y_val)
y_test = label_encoder.transform(y_test)

# Data augmentation (optional, not saving augmented data)
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    zoom_range=0.2
)

# Save preprocessed data
np.save(os.path.join(output_path, 'X_train.npy'), X_train)
np.save(os.path.join(output_path, 'y_train.npy'), y_train)
np.save(os.path.join(output_path, 'X_val.npy'), X_val)
np.save(os.path.join(output_path, 'y_val.npy'), y_val)
np.save(os.path.join(output_path, 'X_test.npy'), X_test)
np.save(os.path.join(output_path, 'y_test.npy'), y_test)

# Save label encoder
with open(os.path.join(output_path, 'label_encoder.npy'), 'wb') as f:
    np.save(f, label_encoder.classes_)

print("Preprocessed data saved successfully.")



In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

# Load preprocessed data
output_path = r'C:\Users\dhars\Downloads\Body fluid cell Dataset\Body fluid cell\Preprossed_data'

def load_data(path, name):
    data = np.load(os.path.join(path, name))
    print(f"Loaded {name}: shape={data.shape}, size={data.size}")  # Debug information
    if data.size == 0:
        raise ValueError(f"Loaded data {name} is empty.")
    return data

X_train = load_data(output_path, 'X_train.npy')
y_train = load_data(output_path, 'y_train.npy')
X_val = load_data(output_path, 'X_val.npy')
y_val = load_data(output_path, 'y_val.npy')
X_test = load_data(output_path, 'X_test.npy')
y_test = load_data(output_path, 'y_test.npy')

# Ensure that the loaded data is not empty
assert X_train.size > 0, "X_train is empty"
assert y_train.size > 0, "y_train is empty"
assert X_val.size > 0, "X_val is empty"
assert y_val.size > 0, "y_val is empty"
assert X_test.size > 0, "X_test is empty"
assert y_test.size > 0, "y_test is empty"

# Convert labels to categorical
num_classes = len(np.unique(y_train))
print(f"Number of classes: {num_classes}")

y_train = to_categorical(y_train, num_classes)
y_val = to_categorical(y_val, num_classes)
y_test = to_categorical(y_test, num_classes)

# Define the model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=X_train.shape[1:]),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Define early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val), callbacks=[early_stopping])

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc:.2f}')

# Save the model
model_save_path = os.path.join(output_path, 'cell_classification_model.h5')
model.save(model_save_path)
print(f'Model saved to {model_save_path}')

Loaded X_train.npy: shape=(8656, 128, 128, 3), size=425459712
Loaded y_train.npy: shape=(8656,), size=8656
Loaded X_val.npy: shape=(495, 128, 128, 3), size=24330240
Loaded y_val.npy: shape=(495,), size=495
Loaded X_test.npy: shape=(247, 128, 128, 3), size=12140544
Loaded y_test.npy: shape=(247,), size=247
Number of classes: 1


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50


  return self.fn(y_true, y_pred, **self._fn_kwargs)


[1m210/271[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m13s[0m 228ms/step - accuracy: 1.0000 - loss: 0.0000e+00

KeyboardInterrupt: 

In [None]:
import numpy as np
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import os

# Load preprocessed data
output_path = r'C:\Users\dhars\Downloads\Body fluid cell Dataset\Body fluid cell\Preprossed_data'

X_test = np.load(os.path.join(output_path, 'X_test.npy'))
y_test = np.load(os.path.join(output_path, 'y_test.npy'))

# Load the trained model
model_path = os.path.join(output_path, 'cell_classification_model.h5')
if not os.path.exists(model_path):
    raise FileNotFoundError(f"Model file not found: {model_path}")

model = tf.keras.models.load_model(model_path)

# Predict on the test data
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

# Classification report
print("Classification Report:")
print(classification_report(y_true, y_pred_classes))

# Confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred_classes)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()