In [None]:
from datasets.fungidataset import build_dataset
from sklearn.linear_model import LogisticRegression as Lin
from sklearn.ensemble import AdaBoostClassifier , RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier, BaggingClassifier, StackingClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import classification_report
from torchvision import models 
import torch
import torch.nn as nn
import numpy as np
import sklearn
from tqdm import tqdm
IMAGEDIR = "/Users/koksziszdave/Downloads/fungi_images"
LABELDIR = "/Users/koksziszdave/Downloads/fungi_train_metadata.csv"

args = {
    "image_dir": IMAGEDIR,
    "labels_path": LABELDIR,
    "pre_load": False,
    "batch_size": 32
}

train_loader, valid_loader = build_dataset(args)

num_semcls = 100
def extract_features(model, dataloader, device):
    """
    Extract features using a pre-trained CNN.
    """
    model.eval()
    features, labels, poisonous = [], [], []

    with torch.no_grad():
        for batch in tqdm(dataloader):
            if(batch["image"].shape[0]!=32):
                continue
            images, lbls, poi= batch["image"].to(device), batch["target_sem_cls"].to(device) , batch["target_poisonous"].to(device)
            
            images=images.permute(0,3,1,2)
            output = model(images)
            features.append(output.cpu().numpy())
            labels.append(lbls.cpu().numpy())
            poisonous.append(poi.cpu().numpy())
            

    features = np.vstack(features)
    labels = np.vstack(labels)
    poisonous = np.vstack(poisonous)

    return features, labels, poisonous

def train_encodedmodel():
     # Use a pre-trained ResNet for feature extraction
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    resnet = models.resnet50(pretrained=True)
    resnet.fc = nn.Identity()  # Remove the classification head
    resnet = resnet.to(device)

    # Extract features
    print("Extracting features from the training set...")
    X_train, y_train ,y_train_pois = extract_features(resnet, train_loader, device)
    print("Extracting features from the validation set...")
    X_val, y_val ,y_val_pois = extract_features(resnet, val_loader, device)
    y_val_pois=y_val_pois.astype(int)
    
    y_train_pois=y_train_pois.astype(int)

    print(X_train.shape, y_train.shape, y_train_pois.shape)
    print(X_val.shape, y_val.shape, y_val_pois.shape)
    #Save to numpy file
    np.save("X_train.npy",X_train)
    np.save("y_train.npy",y_train)
    np.save("y_train_pois.npy",y_train_pois)
    np.save("X_val.npy",X_val)
    np.save("y_val.npy",y_val)
    np.save("y_val_pois.npy",y_val_pois)
    # Load pre-extracted features and labels
    X_train = np.load("X_train.npy")
    y_train = np.load("y_train.npy")
    y_train_pois = np.load("y_train_pois.npy")
    X_val = np.load("X_val.npy")
    y_val = np.load("y_val.npy")
    y_val_pois = np.load("y_val_pois.npy")

    print("Features extracted successfully!")
    y_train_poisonous = y_train_pois.reshape(-1) # Shape: (num_samples,)
    y_val_poisonous = y_val_pois.reshape(-1)    # Shape: (num_samples,)

    # Convert one-hot classes to integers
    y_train_classes = np.argmax(y_train, axis=1)  # Shape: (num_samples,)
    y_val_classes = np.argmax(y_val, axis=1)      # Shape: (num_samples,)

    # Address class imbalance in the poisonousness model
    class_weights = sklearn.utils.class_weight.compute_class_weight(
        class_weight='balanced',
        classes=np.unique(y_train_poisonous),
        y=y_train_poisonous
    )
    class_weights_dict = {i: weight for i, weight in enumerate(class_weights)}
    from sklearn.ensemble import GradientBoostingClassifier

    # Pass the computed class weights to the classifier
    poison_model = GradientBoostingClassifier(
    n_estimators=100,
    max_depth=5,
    subsample=1.0,
    random_state=42
    )

    # Manually apply sample weights during fitting
    sample_weights = np.array([class_weights_dict[label] for label in y_train_poisonous])
    poison_model.fit(X_train, y_train_poisonous, sample_weight=sample_weights)
    
    poison_preds = poison_model.predict(X_val)

    print("Classification Report for Poisonousness:")
    print(classification_report(y_val_poisonous, poison_preds))

    # Additional metrics
    from sklearn.metrics import confusion_matrix
    print("Confusion Matrix for Poisonousness:")
    print(confusion_matrix(y_val_poisonous, poison_preds))
train_encodedmodel()

Features extracted successfully!
Classification Report for Classes:
Classification Report for Poisonousness:
              precision    recall  f1-score   support

           0       0.95      0.89      0.92      4722
           1       0.31      0.54      0.39       430

    accuracy                           0.86      5152
   macro avg       0.63      0.71      0.66      5152
weighted avg       0.90      0.86      0.88      5152

Confusion Matrix for Poisonousness:
[[4197  525]
 [ 198  232]]


In [None]:
from datasets.fungidataset import build_dataset
from sklearn.linear_model import LogisticRegression as Lin
from sklearn.ensemble import AdaBoostClassifier , RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier, BaggingClassifier, StackingClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import classification_report
from torchvision import models 
import torch
import torch.nn as nn
import numpy as np
import sklearn
from tqdm import tqdm
IMAGEDIR = "/Users/koksziszdave/Downloads/fungi_images"
LABELDIR = "/Users/koksziszdave/Downloads/fungi_train_metadata.csv"

args = {
    "image_dir": IMAGEDIR,
    "labels_path": LABELDIR,
    "pre_load": False,
    "batch_size": 32
}

train_loader, valid_loader = build_dataset(args)

num_semcls = 100
def extract_features(model, dataloader, device):
    """
    Extract features using a pre-trained CNN.
    """
    model.eval()
    features, labels, poisonous = [], [], []

    with torch.no_grad():
        for batch in tqdm(dataloader):
            if(batch["image"].shape[0]!=32):
                continue
            images, lbls, poi= batch["image"].to(device), batch["target_sem_cls"].to(device) , batch["target_poisonous"].to(device)
            
            images=images.permute(0,3,1,2)
            output = model(images)
            features.append(output.cpu().numpy())
            labels.append(lbls.cpu().numpy())
            poisonous.append(poi.cpu().numpy())
            

    features = np.vstack(features)
    labels = np.vstack(labels)
    poisonous = np.vstack(poisonous)

    return features, labels, poisonous

def train_encodedmodel():
     # Use a pre-trained ResNet for feature extraction
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    resnet = models.resnet50(pretrained=True)
    resnet.fc = nn.Identity()  # Remove the classification head
    resnet = resnet.to(device)

    # Extract features
    print("Extracting features from the training set...")
    X_train, y_train ,y_train_pois = extract_features(resnet, train_loader, device)
    print("Extracting features from the validation set...")
    X_val, y_val ,y_val_pois = extract_features(resnet, val_loader, device)
    y_val_pois=y_val_pois.astype(int)
    
    y_train_pois=y_train_pois.astype(int)

    print(X_train.shape, y_train.shape, y_train_pois.shape)
    print(X_val.shape, y_val.shape, y_val_pois.shape)
    #Save to numpy file
    np.save("X_train.npy",X_train)
    np.save("y_train.npy",y_train)
    np.save("y_train_pois.npy",y_train_pois)
    np.save("X_val.npy",X_val)
    np.save("y_val.npy",y_val)
    np.save("y_val_pois.npy",y_val_pois)
    # Load pre-extracted features and labels
    X_train = np.load("X_train.npy")
    y_train = np.load("y_train.npy")
    y_train_pois = np.load("y_train_pois.npy")
    X_val = np.load("X_val.npy")
    y_val = np.load("y_val.npy")
    y_val_pois = np.load("y_val_pois.npy")

    print("Features extracted successfully!")
    y_train_poisonous = y_train_pois.reshape(-1) # Shape: (num_samples,)
    y_val_poisonous = y_val_pois.reshape(-1)    # Shape: (num_samples,)

    # Convert one-hot classes to integers
    y_train_classes = np.argmax(y_train, axis=1)  # Shape: (num_samples,)
    y_val_classes = np.argmax(y_val, axis=1)      # Shape: (num_samples,)

    # Address class imbalance in the poisonousness model
    class_weights = sklearn.utils.class_weight.compute_class_weight(
        class_weight='balanced',
        classes=np.unique(y_train_poisonous),
        y=y_train_poisonous
    )
    class_weights_dict = {i: weight for i, weight in enumerate(class_weights)}
    from sklearn.ensemble import GradientBoostingClassifier

    # Pass the computed class weights to the classifier
    poison_model = GradientBoostingClassifier(
    n_estimators=100,
    max_depth=5,
    subsample=1.0,
    random_state=42
    )

    # Manually apply sample weights during fitting
    sample_weights = np.array([class_weights_dict[label] for label in y_train_poisonous])
    poison_model.fit(X_train, y_train_poisonous, sample_weight=sample_weights)
    
    poison_preds = poison_model.predict(X_val)

    print("Classification Report for Poisonousness:")
    print(classification_report(y_val_poisonous, poison_preds))

    # Additional metrics
    from sklearn.metrics import confusion_matrix
    print("Confusion Matrix for Poisonousness:")
    print(confusion_matrix(y_val_poisonous, poison_preds))
train_encodedmodel()

In [4]:
from datasets.fungidataset import build_dataset
from sklearn.linear_model import LogisticRegression as Lin
from sklearn.ensemble import AdaBoostClassifier , RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier, BaggingClassifier, StackingClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import classification_report
from torchvision import models 
import torch
import torch.nn as nn
import numpy as np
import sklearn
from tqdm import tqdm
IMAGEDIR = "/Users/koksziszdave/Downloads/fungi_images"
LABELDIR = "/Users/koksziszdave/Downloads/fungi_train_metadata.csv"

args = {
    "image_dir": IMAGEDIR,
    "labels_path": LABELDIR,
    "pre_load": False,
    "batch_size": 32
}

train_loader, valid_loader = build_dataset(args)

num_semcls = 100
def extract_features(model, dataloader, device):
    """
    Extract features using a pre-trained CNN.
    """
    model.eval()
    features, labels, poisonous = [], [], []

    with torch.no_grad():
        for batch in tqdm(dataloader):
            if(batch["image"].shape[0]!=32):
                continue
            images, lbls, poi= batch["image"].to(device), batch["target_sem_cls"].to(device) , batch["target_poisonous"].to(device)
            
            images=images.permute(0,3,1,2)
            output = model(images)
            features.append(output.cpu().numpy())
            labels.append(lbls.cpu().numpy())
            poisonous.append(poi.cpu().numpy())
            

    features = np.vstack(features)
    labels = np.vstack(labels)
    poisonous = np.vstack(poisonous)

    return features, labels, poisonous

def train_encodedmodel():
    # Load pre-extracted features and labels
    X_train = np.load("X_train.npy")
    y_train = np.load("y_train.npy")
    X_val = np.load("X_val.npy")
    y_val = np.load("y_val.npy")
    print("Features extracted successfully!")
    # Convert one-hot classes to integers
    y_train_classes = np.argmax(y_train, axis=1)  # Shape: (num_samples,)
    y_val_classes = np.argmax(y_val, axis=1)      # Shape: (num_samples,)

    # Pass the computed class weights to the classifier
    
    # Train the classification model
    print("Training the classification model...")
    class_model = OneVsRestClassifier(AdaBoostClassifier( n_estimators=100, random_state=42))


    class_model.fit(X_train, y_train_classes)



    # Evaluate the models
    class_preds = class_model.predict(X_val)



    # Generate classification reports
    print("Classification Report for Classes:")
    print(classification_report(y_val_classes, class_preds))

    # Additional metrics
    from sklearn.metrics import confusion_matrix
    print("Confusion Matrix for Poisonousness:")
    print(confusion_matrix(y_pred=class_preds, y_true=y_val_classes))
train_encodedmodel()

Features extracted successfully!
Training the classification model...
Classification Report for Classes:
              precision    recall  f1-score   support

           0       0.17      0.18      0.17        51
           1       0.44      0.36      0.40        66
           2       0.14      0.20      0.17        45
           3       0.26      0.16      0.20        51
           4       0.16      0.21      0.18        48
           5       0.79      0.57      0.66        46
           6       0.14      0.15      0.14        54
           7       0.41      0.33      0.36        49
           8       0.12      0.12      0.12        24
           9       0.27      0.21      0.23        63
          10       0.07      0.10      0.08        42
          11       0.15      0.19      0.17        52
          12       0.42      0.34      0.38        50
          13       0.55      0.58      0.56        57
          14       0.65      0.49      0.56        41
          15       0.64      0