<a href="https://colab.research.google.com/github/BRIAN12682/Automation-Projects/blob/main/NLEMalariaMicrscopyFYP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers



In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
# Updated folder paths
img_dir = '/content/drive/MyDrive/DATASETS/MalariaPI/images'
annotation_dir = '/content/drive/MyDrive/DATASETS/MalariaPI/annotation'


In [16]:
import os

# Path to your annotations directory
annotation_dir = '/content/drive/MyDrive/DATASETS/MalariaPI/annotation'

# List XML files and select the first one
xml_files = [f for f in os.listdir(annotation_dir) if f.endswith('.xml')]
first_xml_file = xml_files[0]

# Print the content of the first XML file
with open(os.path.join(annotation_dir, first_xml_file), 'r') as file:
    for i in range(10):  # Print the first 10 lines
        print(file.readline().strip())


<annotation>
<source>
<database>Makerere laboratory images database</database>
<annotation>Makerere medical school/Mulago Hospital</annotation>
<image></image>
<flickrid></flickrid>
</source>
<owner>
<flickrid></flickrid>
<name>unknown</name>


In [None]:
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms
import os
import xml.etree.ElementTree as ET
import torch

class MalariaDataset(Dataset):
    def __init__(self, img_dir, annotation_dir, transform=None):
        self.img_dir = img_dir
        self.annotation_dir = annotation_dir
        self.transform = transform
        self.annotations = self.load_annotations()

    def load_annotations(self):
        annotations = []
        for file in os.listdir(self.annotation_dir):
            if file.endswith('.xml'):
                tree = ET.parse(os.path.join(self.annotation_dir, file))
                root = tree.getroot()
                image_filename = file.replace('.xml', '.jpg')  # Assuming the image file matches the XML file name
                image_path = os.path.join(self.img_dir, image_filename)
                image = Image.open(image_path).convert('RGB')

                objects = []
                for obj in root.findall('object'):
                    objects.append({
                        'xmin': float(obj.find('bndbox/xmin').text),
                        'ymin': float(obj.find('bndbox/ymin').text),
                        'xmax': float(obj.find('bndbox/xmax').text),
                        'ymax': float(obj.find('bndbox/ymax').text),
                        'label': obj.find('name').text  # Adjust label as needed
                    })

                annotations.append({
                    'image': image,
                    'objects': objects,
                    'infected': 1 if objects else 0  # 1 if there are pathogens, otherwise 0
                })
        return annotations

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        annotation = self.annotations[idx]
        image = annotation['image']
        if self.transform:
            image = self.transform(image)

        boxes = [[obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']] for obj in annotation['objects']]
        labels = [0 for _ in annotation['objects']]  # Assuming '0' for pathogen

        target = {
            'boxes': torch.tensor(boxes),
            'labels': torch.tensor(labels),
            'infected': torch.tensor(annotation['infected'])
        }

        return image, target

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

img_dir = '/content/drive/MyDrive/DATASETS/MalariaPI/images'
annotation_dir = '/content/drive/MyDrive/DATASETS/MalariaPI/annotation'
dataset = MalariaDataset(img_dir, annotation_dir, transform=transform)


In [1]:
from torch.utils.data._utils.collate import default_collate

def custom_collate_fn(batch):
    # Separate data and targets
    data = [item[0] for item in batch]
    targets = [item[1] for item in batch]

    # Use default collate for data (images)
    data = default_collate(data)

    # Targets need custom handling:
    # Convert each attribute of targets to separate lists that can be processed individually
    boxes = [target['boxes'] for target in targets]
    labels = [target['labels'] for target in targets]
    infected = torch.stack([target['infected'] for target in targets])

    return data, {'boxes': boxes, 'labels': labels, 'infected': infected}


In [32]:
train_loader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=custom_collate_fn)

In [None]:
import torch
from torchvision.models import vit_b_16
import torch.optim as optim
import torch.nn as nn

# Assuming the Vision Transformer model is adapted to output both bounding boxes and classification
model = vit_b_16(pretrained=True)
# Adjust the classifier to output bounding boxes + 1 binary output for classification
model.heads.head = nn.Linear(model.heads.head.in_features, 4 * len(dataset[0][1]['boxes']) + 1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCEWithLogitsLoss()  # Change if using a different kind of target

# Number of epochs
epochs = 10
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for images, targets in train_loader:
        images = images.to(device)
        infected_labels = targets['infected'].to(device)

        # We need to handle variable-sized bounding boxes during the forward pass or calculate loss.
        # Assuming your model and loss function can handle this variable input directly.
        # If not, you might need to modify how you compute the loss.

        optimizer.zero_grad()
        outputs = model(images)
        # Let's assume outputs are properly aligned with how you handle targets; adjust as necessary
        predicted_infected = outputs[:, -1]  # Example, adjust index or method according to your model structure
        loss_infected = criterion(predicted_infected, infected_labels.float())
        # Add loss calculation for bounding boxes if necessary
        loss = loss_infected
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}')


  boxes = [torch.tensor(target['boxes']) for target in targets]
  labels = [torch.tensor(target['labels']) for target in targets]


In [5]:
import torch
from torchvision.models import vit_b_16, ViT_B_16_Weights


# Load the pretrained Vision Transformer model with specified weights
model = vit_b_16(weights=ViT_B_16_Weights.IMAGENET1K_V1)


Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to /root/.cache/torch/hub/checkpoints/vit_b_16-c867db91.pth
100%|██████████| 330M/330M [00:02<00:00, 135MB/s]


In [20]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os

class MalariaDataset(Dataset):
    def __init__(self, folder_path, transform=None):
        self.folder_path = folder_path
        self.transform = transform
        self.images = [f for f in os.listdir(folder_path) if f.endswith('.png')]  # Adjust based on your image file types

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = os.path.join(self.folder_path, self.images[idx])
        image = Image.open(img_name)
        if self.transform:
            image = self.transform(image)
        return image

# Define transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to the input size expected by ViT
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalization for pre-trained models
])

dataset = MalariaDataset(img_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)  # Adjust batch size based on your GPU memory


ValueError: num_samples should be a positive integer value, but got num_samples=0

In [18]:
batch_size = 32

train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)


ValueError: num_samples should be a positive integer value, but got num_samples=0

# gpt-3

In [6]:
!pip install openai

Collecting openai
  Downloading openai-1.33.0-py3-none-any.whl (325 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m325.5/325.5 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: h11, httpcore, httpx, openai
Successfully installed h11-0.14.0 httpcore-1.0.5 ht

In [None]:
import openai

def generate_explanation(annotation, openai_api_key):
    openai.api_key = openai_api_key

    prompt = f"Explain the significance of a malaria pathogen located at {annotation} in a microscopy image."
    response = openai.Completion.create(
        engine="text-davinci-002",
        prompt=prompt,
        max_tokens=150
    )
    return response.choices[0].text

# Example usage
explanation = generate_explanation(annotations[0], 'your-openai-api-key')
