In [1]:
import pyfeats
from typing import *
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os
from utils import *

In [6]:
llf = LowLevelFeatureExtractor(**param_list[10])

# root_folder = "C:\\Users\\trong\\Documents\\skin_data"
root_folder = "/mnt/c/Users/trong/Documents/skin_data" 

In [4]:
# Define image transformations
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize images to 128x128
    transforms.Grayscale(num_output_channels=1),    # Convert to grayscale
    ToNumpy(),  # Convert to tensor
])

In [8]:
train_dataset = CSVImageMetadataDataset(csv_file='./data/linux/vaynen_train_linux.csv', root_dir=root_folder, transform=transform)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)   

test_dataset = CSVImageMetadataDataset(csv_file='./data/linux/vaynen_test_linux.csv', root_dir=root_folder, transform=transform)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

# Check one batch
images, metadata, labels = next(iter(train_dataloader))
print(images.shape)  # Example: torch.Size([32, 3, 128, 128])
print(metadata.shape)  # Example: torch.Size([32, 6])  -> 6 metadata features
print(labels.shape)  # Example: torch.Size([32])

torch.Size([32, 128, 128])
torch.Size([32, 6])
torch.Size([32])


In [7]:
train_dataset = CSVMetadataDataset(csv_file="./data/fos/vaynen_train_fos.csv", root_dir=root_folder)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)   

test_dataset = CSVMetadataDataset(csv_file="./data/fos/vaynen_test_fos.csv", root_dir=root_folder)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

# Check one batch
images, metadata, labels = next(iter(train_dataloader))
print(images.shape)  # Example: torch.Size([32, 3, 128, 128])
print(metadata.shape)  # Example: torch.Size([32, 6])  -> 6 metadata features
print(labels.shape)  # Example: torch.Size([32])

torch.Size([32, 0])
torch.Size([32, 22])
torch.Size([32])


In [4]:
model = SimpleNeuralNetwork(inputs = llf.get_features_size())

# Check the model's architecture
print(model)

SimpleNeuralNetwork(
  (fc1): Linear(in_features=22, out_features=64, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=64, out_features=3, bias=True)
  (softmax): Softmax(dim=1)
  (swish): SwishActivation()
  (batchNorm): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (sigmoid): Sigmoid()
)


In [5]:
train_model(model, train_dataloader, epochs=30, llf=llf ,features_set=llf.function.__name__, ) #10 - 15 is already enough

Training model using cuda


Epoch 30, Loss: 0.015433703656120701: 100%|██████████| 30/30 [00:58<00:00,  1.97s/it]

Training on fos complete!





In [6]:
evaluate_model(model, test_dataloader,llf=llf, features_set=llf.function.__name__)

'Accuracy: 71.7094017094017% On fos'

In [27]:
image_path = "/mnt/c/Users/trong/Documents/skin_data/train/12/image_uuid_000e1f2d-7442-4cdb-89d0-b8350ecdebbb_created_by_haianh.31297_MTwJXvl.png"

p = transform(Image.open(image_path))

In [30]:
llf = LowLevelFeatureExtractor(**param_list[22])

print(llf.function.__name__)

llf.process_single_image(np.array(p))

hog_features


array([0.14839619, 0.11355118, 0.20146944, ..., 0.08740602, 0.01744466,
       0.06479176])

In [10]:
import cv2

type(cv2.resize(np.array(images[0]), (256, 256)))

numpy.ndarray

In [4]:
len(param_list)

21

In [23]:
p =np.random.randint(0,256,size=(100,100)).astype('uint8')
llf.process_single_image(np.array(p)).shape


(54,)

In [None]:
data_folder = os.listdir("./data")

for param in param_list:
    name = param['function'].__name__
    if name not in data_folder:
        print(f"{name} id: {param_list.index(param)}")

In [1]:
import pandas as pd
df = pd.DataFrame(columns=['class 0 acc', 'class 1 acc', 'overall acc'])
print(df)

Empty DataFrame
Columns: [class 0 acc, class 1 acc, overall acc]
Index: []


In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from utils import param_list
import xgboost as xgb

# result_df = pd.read_csv("./multiple_result.csv", index_col="features_name")
md = "nmd"
features_mark = 2 if md == "md" else 8

def process_confusion_matrix(label, pred):
    cm = confusion_matrix(label, pred)
    l = []

    for i in range(cm.shape[0]):
        class_accuracy = cm[i, i] / cm[i].sum() * 100
        l.append(class_accuracy)
        print(f"Class {i} Accuracy: {class_accuracy:.2f}%")

    return l

for i, param in enumerate(param_list):

    features_name = param['function'].__name__

    # Load training data from CSV
    train_file_path = f"./data/{features_name}/vaynen_train_{features_name}_new.csv"  # Change this to your actual file path
    train_data = pd.read_csv(train_file_path)

    # Assuming the first column is the label
    y_train = train_data.iloc[:, 1]  # Labels
    X_train = train_data.iloc[:, features_mark:]  # Features

    # Load test data from another CSV
    test_file_path = f"./data/{features_name}/vaynen_test_{features_name}_new.csv"  # Change this to your actual file path
    test_data = pd.read_csv(test_file_path)

    y_test = test_data.iloc[:, 1]  # Labels
    X_test = test_data.iloc[:, features_mark:]  # Features

    model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
    model.fit(X_train, y_train)

    # Predictions
    y_pred = model.predict(X_test)

    # Evaluation metrics
    accuracy = accuracy_score(y_test, y_pred)
    class_acc = process_confusion_matrix(y_test, y_pred)

    class_acc.append(accuracy*100)

    df.loc[len(df)] = class_acc

In [None]:
df.index = [p['function'].__name__ for p in param_list] # Yes metadata result
df

Unnamed: 0,class 0 acc,class 1 acc,overall acc
fos,76.027397,40.730337,62.659574
glcm_features,76.19863,46.629213,65.0
glds_features,76.712329,47.47191,65.638298
ngtdm_features,73.80137,42.696629,62.021277
sfm_features,74.143836,44.101124,62.765957
lte_measures,76.541096,41.573034,63.297872
fdta,73.116438,49.438202,64.148936
glrlm_features,76.19863,48.314607,65.638298
fps,72.60274,46.629213,62.765957
shape_parameters,71.746575,47.752809,62.659574


In [None]:
df.index = [p['function'].__name__ for p in param_list]  # No metadata result
df

Unnamed: 0,class 0 acc,class 1 acc,overall acc
fos,72.773973,29.494382,56.382979
glcm_features,73.80137,33.146067,58.404255
glds_features,78.767123,28.932584,59.893617
ngtdm_features,74.143836,28.932584,57.021277
sfm_features,74.143836,32.022472,58.191489
lte_measures,77.568493,27.52809,58.617021
fdta,78.59589,30.05618,60.212766
glrlm_features,71.061644,31.460674,56.06383
fps,79.452055,28.089888,60.0
shape_parameters,76.541096,26.685393,57.659574
