Import Packages 

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
%config InlineBackend.figure_format="retina"
import numpy as np
from PIL import Image
import os
import cv2
import random
import torch
from torch import nn, optim
import math
from IPython import display
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
import torch.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset
import pdb
import ast

Create list of morph classes

In [None]:
classes = ()

Load dataset

In [99]:
base_color_tags = ["red", "orange", "yellow", "dark", "black", "lavender", "lav", "brown", "copper", "cream", "white", "creamy", "olive"]
base_color_labels = ["red", "yellow", "dark", "lavender", "cream", "olive"]

We want to sort the raw tags from scraping, the ones listed in `base_color_tags` to fit the tags we want to classify into: `base_color_labels`. For this `dark` covers `black`, `brown`; `red` covers `copper`; `lavender` covers `lav`; and `cream` covers `creamy` and `white`. 

In [100]:
gecko_traits = pd.read_csv("trait-image.csv")

# Clean data up a bit
index = 0
for item in gecko_traits["Tag"]:
    item = ast.literal_eval(item) # convert sting to list
    new_item = []
    for tag in item:
        tag = tag.lower() # convert to lowercase
        new_item.append(tag)
    gecko_traits.loc[index, "Tag"] = new_item
    index += 1

gecko_traits

Unnamed: 0,Tag,Image Name
0,[phantom],2123697.jpeg
1,"[pinstripe, dark]",2702167.jpeg
2,"[dalmatian, super dalmatian, ink spot, red spo...",2702191.jpeg
3,[buckskin],2571629.jpeg
4,"[dalmatian, harlequin, yellow]",2077235.jpeg
...,...,...
5920,"[pin-dashed, red base]",1616388.jpeg
5921,[harlequin],2398217.jpeg
5922,"[brindle, dalmatian]",2673233.jpeg
5923,"[flame, tri-color]",2662688.jpeg


To Train the base color classification part of the model, we need to make a dataset that tags them by color:

In [128]:
new = []

for index, row in gecko_traits.iterrows():
    tags = row["Tag"]
    image_name = row["Image Name"]
    color = []
    traits = []
    addit = False

    for tag in tags:
        if tag in base_color_tags: # get base color traits
            color.append(tag)
            addit = True
        else: # get pattern traits
            traits.append(tag)

    if addit: # add to new dataframe for base color training
        new.append({"Color": color, "Traits": traits, "Image Name": image_name})

geckos_by_color = pd.DataFrame(new)
geckos_by_color

Unnamed: 0,Color,Traits,Image Name
0,[dark],[pinstripe],2702167.jpeg
1,[red],"[dalmatian, super dalmatian, ink spot, red spot]",2702191.jpeg
2,[yellow],"[dalmatian, harlequin]",2077235.jpeg
3,"[cream, black, dark]",[extreme harlequin],1563067.jpeg
4,[red],[harlequin],2702154.jpeg
...,...,...,...
2952,"[orange, dark]",[partial pinstripe],2083303.jpeg
2953,[orange],"[brindle, harlequin, portholes]",1967252.jpeg
2954,"[cream, yellow]",[harlequin],1880987.jpeg
2955,[red],"[dalmatian, portholes, bi-color]",1880952.jpeg


Normalize Color labels as described above. 

In [129]:
redundants = ["black", "lav", "brown", "orange", "copper", "white", "creamy"]
index = 0

for item in geckos_by_color["Color"]:
    tags_ind = 0
    tags = geckos_by_color.loc[index, "Color"]
    for tag in item:
        if tag in redundants:
            if tag in ["black", "brown"]:
                tags[tags_ind] = "dark"
            elif tag in ["lav"]:
                tags[tags_ind] = "lavender"
            elif tag in ["copper", "orange"]:
                tags[tags_ind] = "red"
            elif tag in ["white", "creamy"]:
                tags[tags_ind] = "cream"
        tags_ind += 1
    geckos_by_color.loc[index, "Color"] = list(np.unique(np.array(tags)))
    index += 1

geckos_by_color

Unnamed: 0,Color,Traits,Image Name
0,[dark],[pinstripe],2702167.jpeg
1,[red],"[dalmatian, super dalmatian, ink spot, red spot]",2702191.jpeg
2,[yellow],"[dalmatian, harlequin]",2077235.jpeg
3,"[cream, dark]",[extreme harlequin],1563067.jpeg
4,[red],[harlequin],2702154.jpeg
...,...,...,...
2952,"[dark, red]",[partial pinstripe],2083303.jpeg
2953,[red],"[brindle, harlequin, portholes]",1967252.jpeg
2954,"[cream, yellow]",[harlequin],1880987.jpeg
2955,[red],"[dalmatian, portholes, bi-color]",1880952.jpeg


Translate to one-hot encoding

In [143]:
color_lists = geckos_by_color.explode("Color")
one_hotted = pd.get_dummies(color_lists, columns=["Color"], prefix='', prefix_sep='')
one_hot_color = one_hotted.groupby(level=0).sum()
one_hot_color["Image Name"] = geckos_by_color["Image Name"].values

one_hot_color

Unnamed: 0,Traits,Image Name,cream,dark,lavender,olive,red,yellow
0,[pinstripe],2702167.jpeg,0,1,0,0,0,0
1,"[dalmatian, super dalmatian, ink spot, red spot]",2702191.jpeg,0,0,0,0,1,0
2,"[dalmatian, harlequin]",2077235.jpeg,0,0,0,0,0,1
3,"[extreme harlequin, extreme harlequin]",1563067.jpeg,1,1,0,0,0,0
4,[harlequin],2702154.jpeg,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...
2952,"[partial pinstripe, partial pinstripe]",2083303.jpeg,0,1,0,0,1,0
2953,"[brindle, harlequin, portholes]",1967252.jpeg,0,0,0,0,1,0
2954,"[harlequin, harlequin]",1880987.jpeg,1,0,0,0,0,1
2955,"[dalmatian, portholes, bi-color]",1880952.jpeg,0,0,0,0,1,0


Create a method to turn dataframe into dataloader

In [None]:
class DataFromFrame(df):
    def __init__(self, df, trgt_cols=base_color_labels):
        self.df = df
        self.trgt_cols = trgt_cols
        self.len = len(self.df)
        self.data = self.df.drop(trgt_cols, axis=1)
        self.targets = self.df[trgt_cols]

    def Translate(self, ):
        self.data = self.data.values
        self.targets = self.targets.values
        


Image size generalization

In [None]:
def Load_Folder_Images(folder_path):
    images = []
    for filename in os.listdir(folder_path):
        img = Image.open(os.path.join(folder_path, filename))
        img = img.resize((64, 64))
        img = np.array(img)
        images.append(img)
    return np.array(images)

def Resize_Images(images, size):
    resized_images = []
    for img in images:
        img = Image.fromarray(img)
        img = img.resize((size, size))
        img = np.array(img)
        resized_images.append(img)
    return np.array(resized_images)

def convert_and_save_images_to_greyscale(images, path_save):
    greyscale_images = []
    if not os.path.exists(path_save):
        os.makedirs(path_save)
    for filename, img in images:
        grey_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        greyscale_images.append(grey_img)
        save_path = os.path.join(path_save, filename)
        cv2.imwrite(save_path, grey_img)
    return np.array(greyscale_images)

In [None]:
def load_images(data_dir, img_size):
    images = []
    labels = []
    class_names = sorted(os.listdir(data_dir))  # Get class names from subfolders
    
    for label, class_name in enumerate(class_names):
        class_dir = os.path.join(data_dir, class_name)
        if not os.path.isdir(class_dir):
            continue
        
        for img_file in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_file)
            # Only process valid image files
            if not img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                print(f"Skipping non-image file: {img_file}")
                continue
            try:
                # Load the image, resize, and convert to grayscale
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                img = cv2.resize(img, (img_size, img_size))
                images.append(img)
                labels.append(label)
            except Exception as e:
                print(f"Failed to process {img_path}: {e}")
    
    images = np.array(images)
    labels = np.array(labels)
    return images, labels, class_names

# Set parameters
train_dir = "Train"  # Path to the training dataset
test_dir = "Test"    # Path to the testing dataset
img_size = 64        # Resize images to 64x64 pixels

In [None]:
# load data
images = Load_Folder_Images(path)

In [None]:
greys = convert_images_to_greyscale(images, path_save)

In [1]:
Gecko_Traits = pd.read_csv("trait-image.csv")

NameError: name 'pd' is not defined

Isolate gecko from background

we need to isolate the gecko from the background first to generalize how the model will be seeing the images later on, we do not want the morphology to be affected by how the background looks. So, a quick review of sights like morph market show that there are three main conditions we need to deal with, a white background, a black background, and mst commonly a background involving natural elements like grass or woodchips. 

In [None]:
class Gecko_Classifier(nn.module):
    def __init__(self):
        super(Gecko_Classifier, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=2)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, padding=2)
        self.pool - nn.MaxPool2d(2, 2)
        self.dropout1 = nn.Dropout(0.15)
        self.dropout2 = nn.Dropout(0.25)
        self.dropout3 = nn.Dropout(0.4)
        self.fc1 = nn.Linear(64 * 16 * 16, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x): 
        x = self.pool(F.relu(self.conv1(x)))
        x = self.dropout1(x)
        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout2(x)
        x = x.view(x.size(0), -1)
        x = nn.functional.relu(self.fc1(x))
        x = self.dropout3(x)
        x = nn.functional.softmax(self.fc2(x))
        return x
    
base_colour_labels = ["Red", "Yellow", "Dark", "Lavender", "Brown", "Cream", "Olive"]

class Gecko_Colour(nn.module):
    def __init__(self):
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=2)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, padding=2)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.pool2 = nn.pyramid_pool2d(2, 2)
        self.dropout1 = nn.Dropout(0.15)
        self.dropout2 = nn.Dropout(0.25)
        self.dropout3 = nn.Dropout(0.4)
        self.fc1 = nn.Linear(64 * 16 * 16, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.dropout1(x)
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.dropout2(x)
        x = x.view(x.size(0), -1)
        x = nn.functional.relu(self.fc1(x))
        x = self.dropout3(x)
        x = nn.functional.softmax(self.fc2(x))
        return x
    
class Gecko_lineage(nn.module):
    def __initi__(self):
        
        pass
    def forward(self, x)
        return x
    
def train_model():
    pass

In [None]:
def Train_Color_Model(model, train_loader, criterion, optimizer, epochs=10):
    pass

In [None]:
# Load training and testing data
X_train, y_train, class_names = load_images(train_dir, img_size)
X_test, y_test, _ = load_images(test_dir, img_size)  # Use the same class names

# Normalize images
X_train = X_train / 255.0  # Normalize pixel values to [0, 1]
X_test = X_test / 255.0

# Add a channel dimension for grayscale images
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]

# 2. Data Augmentation
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),  # Randomly flip images horizontally
    layers.RandomRotation(0.05),      # Rotate images by ±10%
    layers.RandomZoom(0.05)           # Randomly zoom into the image
])

Greyscale Morphology

In [None]:
# run on single morph data set
# run on multi morph data set