In [1]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (1

In [19]:
from datasets import load_dataset

ds = load_dataset("youssefedweqd/Diabetic_Retinopathy_Detection")

In [20]:
print(ds)

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 25290
    })
    validation: Dataset({
        features: ['image', 'label'],
        num_rows: 2810
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 7026
    })
})


In [21]:
# Accessing the train split
train_data = ds['train']

# Accessing the test split
test_data = ds['test']

# Accessing the validation split (if available)
val_data = ds['validation']


In [22]:
# Print the first few rows of the train dataset
print(train_data[:5])


{'image': [<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x683 at 0x79E33C628D30>, <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x683 at 0x79E39C28C850>, <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x683 at 0x79E39C1ECFA0>, <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x683 at 0x79E33C62BEE0>, <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x683 at 0x79E33C629210>], 'label': [0, 0, 0, 0, 0]}


In [23]:

print(ds["train"][0])  # Print the first example from the train set


{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x683 at 0x79E33C628FD0>, 'label': 0}


In [24]:
import tensorflow as tf

def preprocess_data(example):
    # Normalize the image
    image = example['image'].convert('RGB')  # Ensure the image is RGB
    image = tf.image.resize(image, (224, 224))  # Resize the image
    image = tf.cast(image, tf.float32) / 255.0  # Normalize pixel values

    # Ensure the label is an integer
    label = tf.cast(example['label'], tf.int32)

    return image, label


In [25]:
import tensorflow as tf

# Convert the Hugging Face dataset to a TensorFlow dataset
tf_train_dataset = ds["train"].to_tf_dataset(
    columns=["image"],  # Columns of the dataset to use as features
    label_cols="label",  # Column to use as the label
    shuffle=True,  # Shuffle the data
    batch_size=32,  # Batch size
    collate_fn=lambda batch: {
        "image": tf.image.resize(tf.stack([item['image'] for item in batch]), (224, 224)) / 255.0,  # Process images
        "label": tf.convert_to_tensor([item['label'] for item in batch], dtype=tf.int32)  # Process labels
    }
)

tf_val_dataset = ds["validation"].to_tf_dataset(
    columns=["image"],
    label_cols="label",
    shuffle=False,
    batch_size=32,
    collate_fn=lambda batch: {
        "image": tf.image.resize(tf.stack([item['image'] for item in batch]), (224, 224)) / 255.0,
        "label": tf.convert_to_tensor([item['label'] for item in batch], dtype=tf.int32)
    }
)


#CNN Model

In [26]:
import tensorflow as tf

# Define a simple CNN model
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(shape=(224, 224, 3)),  # Input shape after resizing
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(5, activation='softmax')  # Assuming 5 classes, adjust as needed
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [27]:
# Check the first sample in the training dataset
print(ds["train"][0])


{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x683 at 0x79E39C1EFCA0>, 'label': 0}


In [30]:
# Convert the Hugging Face dataset to a TensorFlow dataset
tf_train_dataset = ds["train"].to_tf_dataset(
    columns=["image"],  # Columns of the dataset to use as features
    label_cols="label",  # Column to use as the label
    shuffle=True,  # Shuffle the data
    batch_size=32,  # Batch size
    collate_fn=lambda batch: {
        "image": tf.image.resize(tf.stack([tf.image.resize(item['image'], (224, 224)) for item in batch]), (224, 224)) / 255.0,  # Resize and normalize images
        "label": tf.convert_to_tensor([item['label'] for item in batch], dtype=tf.int32)  # Process labels
    }
)

tf_val_dataset = ds["validation"].to_tf_dataset(
    columns=["image"],
    label_cols="label",
    shuffle=False,
    batch_size=32,
    collate_fn=lambda batch: {
        "image": tf.image.resize(tf.stack([tf.image.resize(item['image'], (224, 224)) for item in batch]), (224, 224)) / 255.0,
        "label": tf.convert_to_tensor([item['label'] for item in batch], dtype=tf.int32)
    }
)


In [34]:

# Train the model
history = model.fit(tf_train_dataset, epochs=1, validation_data=tf_val_dataset)


[1m791/791[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3085s[0m 4s/step - accuracy: 0.7390 - loss: 0.8628 - val_accuracy: 0.7349 - val_loss: 0.8571


In [38]:

# Evaluate the model
test_loss, test_acc = model.evaluate(tf_val_dataset)
print(f"Test Accuracy: {test_acc * 100:.2f}%")

[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 1s/step - accuracy: 0.9567 - loss: 0.3971
Test Accuracy: 73.49%


In [41]:
import numpy as np
import tensorflow as tf
from PIL import Image
import requests
from io import BytesIO

# Function to load and preprocess an image
def preprocess_image(image_path):
    # Load the image from file or URL
    if image_path.startswith('http'):
        response = requests.get(image_path)
        img = Image.open(BytesIO(response.content))
    else:
        img = Image.open(image_path)

    # Convert the image to RGB, resize to (224, 224), and normalize
    img = img.convert('RGB')
    img = tf.image.resize(np.array(img), (224, 224))  # Resize image to match the model input
    img = img / 255.0  # Normalize the image
    img = np.expand_dims(img, axis=0)  # Add batch dimension (1 sample)

    return img

# Function to predict diabetic retinopathy or not
def predict_diabetes(image_path):
    # Preprocess the image
    img = preprocess_image(image_path)

    # Predict using the trained model
    predictions = model.predict(img)

    # Get the class with the highest probability
    predicted_class = np.argmax(predictions)

    # Define class labels (Adjust according to your dataset labels)
    labels = ['Class 0: No Diabetic Retinopathy',
              'Class 1: Mild Diabetic Retinopathy',
              'Class 2: Moderate Diabetic Retinopathy',
              'Class 3: Severe Diabetic Retinopathy',
              'Class 4: Proliferative Diabetic Retinopathy']

    # Output the result
    print(f"Predicted Class: {labels[predicted_class]}")
    return predicted_class, predictions[0][predicted_class]

# Example usage:
image_path = r"/content/drive/MyDrive/dia.jpg" # Path to your image or URL
predicted_class, confidence = predict_diabetes(image_path)
print(f"Confidence: {confidence * 100:.2f}%")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 131ms/step
Predicted Class: Class 0: No Diabetic Retinopathy
Confidence: 72.13%
