In [1]:
import pandas as pd
import os
import random
import numpy as np
import torch
import catboost as cb
from sklearn.metrics import mean_squared_error, mean_absolute_error, log_loss
import matplotlib.pyplot as plt
import seaborn as sns
from common_stuff import device
from torch import nn
from timm import create_model
from torch.utils.data import DataLoader

from common_stuff import ConvNeXt, EfficientNet, PictureDataset
from common_stuff import transform

from sklearn.model_selection import train_test_split

INFO:albumentations.check_version:A new version of Albumentations is available: 1.4.16 (you have 1.4.11). Upgrade using: pip install --upgrade albumentations


In [2]:
seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

df = pd.read_csv("../../post2ctr_dataset.csv")
train_df, test_df = train_test_split(df, test_size=0.2, random_state=seed)

train_dataset = PictureDataset(train_df, transform=transform)
test_dataset = PictureDataset(test_df, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

In [8]:
model_paths = [
    # "../../convnext_MSE.pth", "../../convnext_BCE.pth", "../../convnext_BCEWeighted_2.pth", "../../convnext_BCEWeighted_4.pth", #"../../convnext_BCEWeighted_3.pth",
    "../../effnet_MSE.pth", "../../effnet_BCE.pth", "../../effnet_BCEWeighted_2.pth", "../../effnet_BCEWeighted_3.pth", "../../effnet_BCEWeighted_4.pth",
    # "../../convnext_bert_recsys.pth"
]

def load_convnext_model(model_path):
    model = ConvNeXt()
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    model.eval()
    return model

def load_effnet_model(model_path):
    model = EfficientNet()
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    model.eval()
    return model

def load_catboost_model(model_path):
    pass

def evaluate_model(model, model_type, test_loader=None, test_data=None):
    if model_type == "torch":
        predictions, targets = [], []
        model.eval()
        with torch.no_grad():
            for batch in test_loader:
                inputs = batch["img"].to(device)
                target = batch["target"].to(device, dtype=torch.float).squeeze()
                outputs = model(inputs).squeeze().cpu().numpy()

                predictions.extend(outputs)
                targets.extend(target.cpu().numpy())
                
    elif model_type == "catboost":
        predictions = model.predict(test_data["X"])
        targets = test_data["y"]

    mse = mean_squared_error(targets, predictions)
    mae = mean_absolute_error(targets, predictions)
    logloss = log_loss(np.clip(targets, 1e-15, 1-1e-15), np.clip(predictions, 1e-15, 1-1e-15))

    plt.figure()
    sns.histplot(predictions, bins=50, color="blue", kde=True)
    plt.title("Distribution of Predicted Probabilities")
    plt.xlabel("Predicted Probability")
    plt.ylabel("Frequency")
    plt.tight_layout()
    
    image_path = f"distribution_{model_type}.png"
    plt.savefig(image_path)
    plt.close()

    return mse, mae, logloss, image_path

In [9]:
results = []
for model_path in model_paths:
    if model_path.endswith(".pth"):
        model = None
        if "convnext" in model_path: 
            model = load_convnext_model(model_path)
            name = "ConvNeXt"
        if "effnet" in model_path:
            model = load_effnet_model(model_path)
            name = "EfficientNet"
        mse, mae, logloss, image_path = evaluate_model(model, "torch", test_loader=test_loader)
        results.append([name, mse, mae, logloss, image_path])
        
    elif model_path.endswith(".cbm"):
        model = load_catboost_model(model_path)
        mse, mae, logloss, image_path = evaluate_model(model, "catboost", test_data=test_df)
        results.append(["CatBoost", mse, mae, logloss, image_path])

Using cache found in /root/.cache/torch/hub/NVIDIA_DeepLearningExamples_torchhub


ValueError: Unknown label type: (array([0.00770551, 0.03501751, 0.00060459, ..., 0.00763942, 0.00827124,
       0.11441347], dtype=float32),)

In [None]:
columns = ["Model", "MSE", "MAE", "Logloss", "Distribution Plot"]
df_results = pd.DataFrame(results, columns=columns)

from IPython.display import display, Image

for i, row in df_results.iterrows():
    print(f"Model: {row['Model']}")
    print(f"MSE: {row['MSE']}")
    print(f"MAE: {row['MAE']}")
    print(f"Logloss: {row['Logloss']}")
    display(Image(filename=row['Distribution Plot']))