<a href="https://colab.research.google.com/github/ElijahWandimi/Datascience/blob/main/Image_Preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np

from PIL import Image
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision.models import resnet18

from sklearn.manifold import TSNE

from google.colab import files
import warnings
warnings.filterwarnings("ignore")


#### downloading data

In [None]:
files.upload()

In [None]:
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d sundarannamalai/hair-diseases

In [None]:
! mkdir hair_diseases
! unzip hair-diseases.zip -d hair_diseases


#### Analysis
grey scale conversion , HE , scalimg , shaping and feature extraction

In [None]:
train_root_path = '/content/hair_diseases/Hair Diseases - Final/train'

class_paths = [os.path.join(train_root_path, f) for f in os.listdir(train_root_path)]

train_imgs = []
for cls in class_paths:
  train_imgs += [os.path.join(cls, f) for f in os.listdir(cls)]

In [None]:

def display_image(img: torch.Tensor):
  plt.imshow(img.permute(1, 2, 0).cpu())

def convert_to_gray(img: torch.Tensor):
  x = transforms.RandomGrayscale(p=0.8)
  return x.forward(img)


def HE_transform(img: torch.Tensor):
  x = transforms.RandomEqualize(p=0.7)
  return x.forward(img)

def normalize_transform(img: torch.Tensor):
  img = img.float()
  x = transforms.Normalize(torch.mean(img), torch.std(img))
  return x.forward(img)

def scale_transform(img: torch.Tensor):
  x = transforms.Resize((420, 420))
  return x.forward(img)

def transform_images(img: torch.Tensor):
  return [HE_transform(img), convert_to_gray(img), normalize_transform(img), scale_transform(img)]


In [None]:
train_imgs[0]

In [None]:
example = torchvision.io.read_image(train_imgs[0])
tm_imgs = transform_images(example)

titles = ['Histogram Equalization', 'Grayscale', 'Normalised {brightnes and hue}', 'Scaled {420 x 420}']

fig, ax = plt.subplots(1, 4, figsize=(12, 10))
for i in range(len(tm_imgs)):
  ax[i].imshow(tm_imgs[i].permute(1, 2, 0).cpu())
  ax[i].set_title(titles[i])

In [None]:

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomEqualize(p=0.7),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

dataset = ImageFolder(train_root_path, transform=transform)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

#  CNN Model
class HairDiseaseCNN(nn.Module):
    def __init__(self, num_classes):
        super(HairDiseaseCNN, self).__init__()
        self.resnet = resnet18(pretrained=True)  # Load a pre-trained ResNet model
        # Remove the last fully connected layer of ResNet
        self.resnet = nn.Sequential(*(list(self.resnet.children())[:-1]))
        self.fc = nn.Linear(512, num_classes)    # Add a custom fully connected layer

    def forward(self, x):
        x = self.resnet(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Step 3: Train the CNN
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_classes = len(dataset.classes)  # Number of classes in  hair disease dataset
model = HairDiseaseCNN(num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader)}")

print("Training finished!")

# Step 4: Save the trained model if desired
torch.save(model.state_dict(), "hair_disease_cnn.pth")

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

# Create the dataset and DataLoader without RandomEqualize
data_ext_loader = DataLoader(dataset, batch_size=32, shuffle=False)

model_extractor = HairDiseaseCNN(num_classes).to(device)
model_extractor.load_state_dict(torch.load("hair_disease_cnn.pth"))

model_extractor.eval()

#  Feature Extraction
all_features = []
with torch.no_grad():
    for inputs, labels in data_ext_loader:
        inputs = inputs.to(device)
        features = model_extractor(inputs)
        all_features.append(features)

# Concatenate features from all batches into a single tensor
features_tensor = torch.cat(all_features)

# Now 'features_tensor' contains the extracted features from the model
print(features_tensor.shape)

In [None]:
features_np = features_tensor.cpu().numpy()
features_np = (features_np - features_np.mean(axis=0)) / features_np.std(axis=0)

# Perform t-SNE on the features
tsne = TSNE(n_components=2, random_state=42)
embedded_features = tsne.fit_transform(features_np)

class_labels = np.array([y for _, y in dataset])

# Create a scatter plot for each class
plt.figure(figsize=(10, 8))
for class_idx in np.unique(class_labels):
    class_mask = (class_labels == class_idx)
    plt.scatter(embedded_features[class_mask, 0], embedded_features[class_mask, 1], label=f"Class {class_idx}", alpha=0.7)

plt.xlabel("t-SNE Component 1")
plt.ylabel("t-SNE Component 2")
plt.legend()
plt.title("t-SNE Visualization of Extracted Features")
plt.show()