# Orange brix Analysis

In [1050]:
# from google.colab import drive
# drive.mount("/content/drive")
# %cd "/content/drive//MyDrive/Colab Notebooks/Master_homework/Advanced_programming_for_scientific_computing/"
# # Master_homework/

## Import libraries

In [1051]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from enum import Enum
from tqdm import tqdm

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score, mean_squared_error

import os

In [1052]:
# !pip install lightgbm
# !pip install xgboost

In [1053]:
plt.style.use("fivethirtyeight")

In [1054]:
GENERATE_PLOTS: bool = False

## Create Classes

In [1055]:
class Utilities:
    def __init__(self):
        pass

    def get_categorical_numeric_cols(self, dataframe: pd.DataFrame):
        if len(dataframe) == 0:
            raise Exception("No records found!")

        numeric_cols = []
        categorical_cols = []

        for col in dataframe.columns:
            if isinstance(dataframe[col][0], bool) or not isinstance(
                dataframe[col][0], (int, float, np.int8, np.int16, np.int32, np.int64, np.float16, np.float32, np.float64)
            ):
                categorical_cols.append(col)
            else:
                numeric_cols.append(col)

        return (categorical_cols, numeric_cols)

In [1056]:
from sklearn.preprocessing import (
    MaxAbsScaler,
    MinMaxScaler,
    Normalizer,
    RobustScaler,
    StandardScaler,
)


class ScalerType(Enum):
    Raw_ = None
    MaxAbsScaler_ = MaxAbsScaler
    MinMaxScaler_ = MinMaxScaler
    Normalizer_ = Normalizer
    RobustScaler_ = RobustScaler
    StandardScaler_ = StandardScaler

In [1057]:
class Dataset(Utilities):
    def __init__(
        self,
        name: str,
        dataframe: pd.DataFrame,
        scaler_type: ScalerType = ScalerType.Raw_,
    ):
        self.name: str = name
        self.dataframe: pd.DataFrame = dataframe
        self.categorical_cols: list[str] = []
        self.numeric_cols: list[str] = []
        self.scaler_type: ScalerType = scaler_type

        self.categorical_cols, self.numeric_cols = self.get_categorical_numeric_cols(
            self.dataframe
        )

    def get_name(self) -> str:
        return self.name

    def get_dataframe(self) -> pd.DataFrame:
        return self.dataframe

    def detect_categorical_numeric_cols(self):
        if len(self.dataframe) == 0:
            raise Exception("No records found!")

        self.numeric_cols = []
        self.categorical_cols = []

        for col in self.dataframe.columns:
            try:
                float(self.dataframe[col][0])
                self.numeric_cols.append(col)
            except:
                self.categorical_cols.append(col)

    def get_numeric_cols(self) -> list[str]:
        return self.numeric_cols

    def get_numeric_dataframe(self) -> pd.DataFrame:
        return self.dataframe[self.numeric_cols]

    def get_categorical_dataframe(self) -> pd.DataFrame:
        return self.dataframe[self.categorical_cols]

    def get_scaler_type(self) -> ScalerType:
        return self.scaler_type

    def concat_dataframe(self, additional_dataframe: pd.DataFrame):
        if len(additional_dataframe) == len(self.dataframe):
            self.dataframe = pd.concat([self.dataframe, additional_dataframe], axis=1)
            self.detect_categorical_numeric_cols()

    def get_one_hot_vectorized_dataset(
        self, categorical_col: str, print_name: bool = True
    ):
        if categorical_col in self.categorical_cols:
            dummies_dataframe = pd.get_dummies(self.dataframe[categorical_col])
            dummies_dataframe = pd.concat([dummies_dataframe, self.dataframe], axis=1)
            new_dataset_name = self.name + "_ohv_" + categorical_col
            if print_name:
                print(new_dataset_name)
            return Dataset(
                name=new_dataset_name,
                dataframe=dummies_dataframe,
                scaler_type=self.scaler_type,
            )
        elif categorical_col in self.numeric_cols:
            print("Records for input column name are not categorical!")
        else:
            print("Input column name does not exists!")

In [1058]:
class ModelType(Enum):
    LINEAR_REGRESSION: dict = {}
    SVR: dict = {
        "kernel": "rbf", # imp | linear, poly, rbf
        "degree": 3,
        "gamma": "scale",
        "coef0": 0,
        "tol": 1e-3,
        "C": 1.0, # imp tune from 0.1 - 2 step 0.1
        "epsilon": 0.1,
    }
    DECISION_TREE: dict = {
        "criterion": "squared_error",
        "splitter": "best",
        "max_depth": None, # imp tune 1 - number of feature
        "min_samples_split": 2,
        "min_samples_leaf": 1,
        "min_weight_fraction_leaf": 0.0,
        "max_features": None, # int 2->5 {“sqrt”, “log2”}, default=None

        "random_state": 42,
        "max_leaf_nodes": None,
        "min_impurity_decrease": 0,
    }
    RANDOM_FOREST: dict = {
        "n_estimators": 100, # int, 100-1000 (step 100)
        "criterion": "squared_error",
        "max_depth": None, # imp tune 1 - number of feature
        "min_samples_split": 2,
        "min_samples_leaf": 1,
        "min_weight_fraction_leaf": 0.0,
        "max_features": 1.0, # int 2->5 {“sqrt”, “log2”}, default=None
        "max_leaf_nodes": None,
        "min_impurity_decrease": 0.0,
    }
    LIGHT_GBM: dict = {
        "boosting_type": "gbdt",
        "num_leaves": 31,
        "max_depth": -1,
        "learning_rate": 0.1,
        "n_estimator": 100, # int, 100->1000 (step 100)
        "verbosity": -1,
    }
    XGB: dict = {
        "objective": "reg:linear",
        "n_estimators": 10, # int, 100-1000 (step 100)
        "seed": 123,
        "verbosity": 0,
    }


In [1059]:
class Result:
    def __init__(
        self,
        dataset_name: str,
        model_name: str,
        model_type: ModelType,
        scaler_type: str,
        r2: float,
        mse: float,
    ):
        self.result = [dataset_name, model_name, model_type, scaler_type, r2, mse]

    def get_result(self):
        return self.result

In [1060]:
class ResultFrame:
    def __init__(self):
        self.result_frame: pd.DataFrame = pd.DataFrame()
        self.count = 0

        self.start_up()

    def start_up(self):
        self.result_frame["dataset_name"] = []
        self.result_frame["model_name"] = []
        self.result_frame["model_type"] = []
        self.result_frame["scaler_type"] = []
        self.result_frame["r2"] = []
        self.result_frame["mse"] = []

    def add_result(self, new_result: Result):
        self.result_frame.loc[self.count] = new_result.result
        self.count += 1

    def display_result(self):
        self.result_frame.sort_values(["r2", "mse"], ascending=False, inplace=True)
        display(self.result_frame)

    def save_result(self, filename, overwrite=False):
        file_exists = os.path.isfile(os.path.join(os.getcwd(), filename))
        if file_exists and not overwrite:
            print("Error: CANNOT SAVE FILE.")
            print("A file with the same name already exists.")
            print(
                "Set overwrite to True to overwrite existed file or change file name!"
            )
        else:
            self.result_frame.to_excel(filename)

In [1061]:
class Model(Utilities):
    def __init__(
        self,
        model_type: ModelType,
        name: str,
        x_cols: list[str],
        y_cols: list[str],
        dataset: Dataset,
        scaler_type: ScalerType = ScalerType.Raw_,
    ):

        if len(x_cols) == 0 or len(y_cols) == 0:
            raise Exception("Data columns are not specified!")

        self.model_type: ModelType = model_type
        self.name: str = name
        self.scaler_type: ScalerType = scaler_type
        self.scaler = self.get_scaler(self.scaler_type)  # could be None
        self.x_dataset: Dataset = None
        self.y_dataset: Dataset = None
        self.inner_model = None

        self.preprocess_data(x_cols, y_cols, dataset)

    # Extract data from x_cols and y_cols
    # Redetect categorical and numeric columns
    def preprocess_data(self, x_cols: list[str], y_cols: list[str], dataset: Dataset):
        x_dataset_new_name = dataset.get_name() + "_x"
        y_dataset_new_name = dataset.get_name() + "_x"
        scaler_type = dataset.get_scaler_type()
        self.x_dataset = Dataset(
            name=x_dataset_new_name,
            dataframe=dataset.get_dataframe()[x_cols],
            scaler_type=scaler_type,
        )
        self.y_dataset = Dataset(
            name=y_dataset_new_name,
            dataframe=dataset.get_dataframe()[y_cols],
            scaler_type=scaler_type,
        )

        self.process_x_dataset()

    # arrange [numeric_cols, categorical_cols]
    def process_x_dataset(self):
        new_name = self.x_dataset.get_name()
        numeric_dataframe = self.x_dataset.get_numeric_dataframe()
        categorical_dataframe = self.x_dataset.get_categorical_dataframe()
        scaler_type = self.x_dataset.get_scaler_type()
        self.x_dataset = Dataset(
            name=new_name,
            dataframe=pd.concat([numeric_dataframe, categorical_dataframe], axis=1),
            scaler_type=scaler_type,
        )

    def get_x_list_index_numeric_cols(self):
        return list(range(len(self.x_dataset.get_numeric_cols())))

    def fit_scaler_and_scale_dataframe(self, X: pd.DataFrame) -> pd.DataFrame:
        if self.scaler is not None:
            list_index = self.get_x_list_index_numeric_cols()
            X_numeric = X[:, list_index]
            X_categorical = X[:, len(list_index) :].astype(int)
            self.scaler.fit(X_numeric)
            transformed_X = self.scaler.transform(X_numeric)
            transformed_X = np.concatenate([transformed_X, X_categorical], axis=1)
        else:
            transformed_X = X

        return transformed_X

    def get_x_dataframe(self):
        return self.x_dataset.get_dataframe()

    def get_y_dataframe(self):
        return self.y_dataset.get_dataframe()

    def get_scaler(self, scaler_type: ScalerType):
        if scaler_type != ScalerType.Raw_:
            return scaler_type.value()

    def fit(self, X=None, Y=None):
        if X is None:
            X = np.array(self.x_dataset.get_dataframe())
            Y = np.array(self.y_dataset.get_dataframe())

        model_family = self.model_type.__str__().split(".")[-1]
        params = self.model_type.value

        Y = np.squeeze(Y)

        # need to scale
        transformed_X = self.fit_scaler_and_scale_dataframe(X)

        if model_family == "SVR":
            self.model = SVR(**params).fit(transformed_X, Y)
        elif model_family == "LINEAR_REGRESSION":
            self.model = LinearRegression(**params).fit(transformed_X, Y)
        elif model_family == "DECISION_TREE":
            self.model = DecisionTreeRegressor(**params).fit(transformed_X, Y)
        elif model_family == "RANDOM_FOREST":
            self.model = RandomForestRegressor(**params).fit(transformed_X, Y)
        elif model_family == "LIGHT_GBM":
            self.model = LGBMRegressor(**params).fit(transformed_X, Y)
        elif model_family == "XGB":
            self.model = XGBRegressor(**params).fit(transformed_X, Y)
        else:
            raise ValueError("Unknown model family!")

    def predict(self, X):
        if self.scaler is not None:
            list_index = self.get_x_list_index_numeric_cols()
            X_numeric = X[:, list_index]
            X_categorical = X[:, len(list_index) :].astype(int)
            transformed_X = self.scaler.transform(X_numeric)
            transformed_X = np.concatenate([transformed_X, X_categorical], axis=1)
        else:
            transformed_X = X
        return self.model.predict(transformed_X)

    def evaluate(self, X=None, Y=None, method="LOOCV"):
        if X is None:
            X = np.array(self.x_dataset.get_dataframe())
            Y = np.array(self.y_dataset.get_dataframe())
        if method == "LOOCV":
            preds = [0] * len(Y)
            total = len(Y)
            kf = KFold(n_splits=total)
            kf.get_n_splits(Y)

        with tqdm(total=total) as pbar:
            for i, (train_index, valid_index) in enumerate(kf.split(X, Y)):
                X_train = X[train_index]
                Y_train = Y[train_index]
                X_valid = X[valid_index]
                Y_valid = Y[valid_index]
                self.fit(X_train, Y_train)

                Y_valid_pred = self.predict(X_valid)
                for j in range(len(valid_index)):
                    index = valid_index[j]
                    value = Y_valid_pred[j]
                    preds[index] = value
                pbar.update(1)

        r2 = r2_score(Y, preds)
        mse = mean_squared_error(Y, preds)

        # Compile result
        dataset_name = self.x_dataset.get_name()[:-2]
        return Result(
            dataset_name=dataset_name,
            model_name=self.name,
            model_type=self.model_type,
            scaler_type=self.scaler_type,
            r2=r2,
            mse=mse,
        )

    def save(self, filename: str):
        pass

    def load(self, filename: str):
        pass

    def set_params(self, params: dict):

        pass

## Create main variables

In [1062]:
list_dataset: dict[str, Dataset] = dict()
list_model: dict = dict()


## Create functions

In [1063]:
def list_all_dataset_names():
    return list_dataset.keys()

In [1064]:
def list_all_model_names():
    return list_model.keys()

In [1065]:
def plot_along_dataset(
    dataset: Dataset,
    nrow_subplot: int,
    ncol_subplot: int,
    figsize: tuple[int, int],
):
    numeric_df = dataset.get_dataframe()[dataset.numeric_cols]
    x = np.arange(len(numeric_df))
    figs, axes = plt.subplots(nrow_subplot, ncol_subplot, figsize=figsize)
    for i in range(nrow_subplot):
        for j in range(ncol_subplot):
            ax = axes[i][j]
            index = i * ncol_subplot + j
            ax.plot(x, numeric_df.iloc[:, index])
            ax.set_title(f"{numeric_df.columns[index]} along dataframe", size=15)
            ax.xaxis.set_tick_params(labelsize=12)
            ax.xaxis.set_ticks(np.arange(0, len(numeric_df), 10))
            ax.yaxis.set_tick_params(labelsize=12)

In [1066]:
def plot_hist_kde_box_all_columns(dataset: Dataset):

    number_of_numeric_col = len(dataset.numeric_cols)
    figs, axes = plt.subplots(
        number_of_numeric_col, 3, figsize=(30, number_of_numeric_col * 10)
    )

    numeric_dataframe = dataset.get_dataframe()[dataset.numeric_cols]

    for i in range(number_of_numeric_col):
        axes[i][0].set_title(f"{numeric_dataframe.columns[i]} histogram", size=20)
        axes[i][0].yaxis.set_tick_params(labelsize=15)
        axes[i][0].xaxis.set_tick_params(labelsize=15)
        sns.histplot(ax=axes[i][0], data=numeric_dataframe.iloc[:, i])

        axes[i][1].set_title(f"{numeric_dataframe.columns[i]} KDE", size=20)
        axes[i][1].xaxis.set_tick_params(labelsize=15)
        axes[i][1].yaxis.set_tick_params(labelsize=15)
        sns.kdeplot(ax=axes[i][1], data=numeric_dataframe.iloc[:, i])

        axes[i][2].set_title(f"{numeric_dataframe.columns[i]} Boxplot", size=20)
        axes[i][2].xaxis.set_tick_params(labelsize=15)
        axes[i][2].yaxis.set_tick_params(labelsize=15)
        sns.boxplot(ax=axes[i][2], data=numeric_dataframe.iloc[:, i])

In [1067]:
def plot_all_columns_group_by_a_categorical_col(dataset: Dataset, categorical_col: str):

    dataframe = dataset.get_dataframe()
    numeric_dataframe = dataframe[dataset.numeric_cols]
    number_of_numeric_col = len(dataset.numeric_cols)
    figs, axes = plt.subplots(
        number_of_numeric_col, 2, figsize=(20, number_of_numeric_col * 10)
    )
    groups = dataframe[categorical_col].unique()

    for i in range(number_of_numeric_col):
        axes[i][0].set_title(f"{numeric_dataframe.columns[i]} KDE", size=20)
        axes[i][0].xaxis.set_tick_params(labelsize=15)
        axes[i][0].yaxis.set_tick_params(labelsize=15)
        for group in groups:
            sns.kdeplot(
                ax=axes[i][0],
                data=numeric_dataframe[dataframe[categorical_col] == group].iloc[:, i],
                label=f"Type {group}",
            )
        axes[i][0].legend()

        axes[i][1].set_title(f"{numeric_dataframe.columns[i]} Boxplot", size=20)
        axes[i][1].xaxis.set_tick_params(labelsize=15)
        axes[i][1].yaxis.set_tick_params(labelsize=15)
        sns.boxplot(
            ax=axes[i][1],
            hue="type",
            y=numeric_dataframe.columns[i],
            data=dataframe,
        )

In [1068]:
def divide_dataframe_with_categorical_col(
    dataset: Dataset, categorical_col: str
) -> dict[str, pd.DataFrame]:
    dataframe = dataset.get_dataframe()
    groups = dataframe[categorical_col].unique()

    result = {}
    for group in groups:
        result[group] = dataframe[dataframe[categorical_col] == group]

    return result

In [1069]:
def calculate_p_values_between_partitions(dataframes, columns_to_compare):
    p_values = {}
    for key1, df1 in dataframes.items():
        for key2, df2 in dataframes.items():
            if key1 != key2:
                for col in columns_to_compare:
                    if col in df1 and col in df2:
                        t_stat, p_val = stats.ttest_ind(df1[col], df2[col])
                        p_values[f"{col}_{key1}-{col}_{key2}"] = p_val
    return p_values

In [1070]:
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error
from typing import Optional, Callable


def summary_linear_model(x: list, y, dataset, transformer: Optional[Callable] = None):
    x_ = sm.add_constant(dataset[x])
    y_ = dataset[y]
    results = sm.OLS(y_, x_).fit()
    Intercept = results.params.iloc[0]
    Slope = results.params.iloc[1:]
    P_values = results.pvalues[1:]
    R_squared = results.rsquared
    MSE = mean_squared_error(y_, results.predict())
    print("Intercept\n", Intercept, end="\n\n")
    print("Slope\n", Slope, end="\n\n")
    print("P_values\n", P_values, end="\n\n")
    print("R_squared\n", R_squared, end="\n\n")
    print("MSE\n", MSE, end="\n\n")
    return {
        "Intercept": Intercept,
        "Slope": Slope,
        "P_values": P_values,
        "R_squared": R_squared,
        "MSE": MSE,
    }

In [1071]:
# intercept_a = model_no_scale["Slope"]["A"] + model_no_scale["Intercept"]
# intercept_b = model_no_scale["Slope"]["B"] + model_no_scale["Intercept"]
# intercept_c = model_no_scale["Slope"]["C"] + model_no_scale["Intercept"]

# x = np.arange(model_dataset["mass"].min(), model_dataset["mass"].max())
# ya = x * model_no_scale["Slope"]["mass"] + intercept_a
# yb = x * model_no_scale["Slope"]["mass"] + intercept_b
# yc = x * model_no_scale["Slope"]["mass"] + intercept_c
# plt.plot(x, ya, linewidth=2, label="type_A")
# plt.plot(x, yb, linewidth=2, label="type_B")
# plt.plot(x, yc, linewidth=2, label="type_C")
# plt.legend()

## Import data

In [1072]:
!ls

cropped.rar
main_orange.ipynb
orange-Copy1.ipynb
orange.ipynb
orange_anh_hieu-Copy1.ipynb
orange_anh_hieu.ipynb
orange_best.ipynb
orange_data.xlsx
resnet34.xlsx
result_2.xlsx
result_3.xlsx
result_4.xlsx
result_vit_base_patch16_224.dino_best_054777.xlsx
result_vit_base_patch16_224.dino_best_05526.xlsx
result_vit_base_patch16_224.dino_best_54_1.xlsx
result_XGB_get_10_new.xlsx
TIMM.ipynb
vit_base_patch14_dinov2.lvd142m_get_30.xlsx
vit_base_patch16_224.dino_get_10.xlsx
vit_small_patch14_dinov2.lvd142m_get_10.xlsx
vit_small_patch14_dinov2.lvd142m_get_20.xlsx
vit_small_patch14_dinov2.lvd142m_get_30.xlsx
vit_small_patch14_dinov2.lvd142m_get_5.xlsx
vit_small_patch14_dinov2.lvd142m_get_97.xlsx
vit_small_patch14_dinov2.lvd142m_get_one.xlsx
vit_small_patch14_dinov2.lvd142m_mean.xlsx


In [1073]:
raw_df = pd.read_excel("orange_data.xlsx", sheet_name=0)

In [1074]:
raw_df.head(10)

Unnamed: 0,Name,p,C NaOH,Khối lượng(g),Đường kính ngang(mm),Đường kính dọc(mm),Độ đường (Brix %),TA (%),Brix:TA,BrmTA
0,A1,1.9,0.4,255,257,262,10.9,4.867141,2.239508,6.032859
1,A2,1.2,0.4,325,288,278,9.8,3.073984,3.188045,6.726016
2,A3,2.3,0.4,185,235,237,11.0,5.891803,1.867001,5.108197
3,A4,1.4,0.4128,203,245,245,9.0,3.701077,2.431725,5.298923
4,A5,1.2,0.4,229,253,249,10.9,3.073984,3.545887,7.826016
5,A7,1.3,0.4128,357,305,290,10.7,3.436714,3.113439,7.263286
6,A8,2.0,0.4,309,275,285,11.2,5.123307,2.186088,6.076693
7,A9,1.9,0.4128,321,285,280,9.2,5.02289,1.831615,4.17711
8,A10,1.45,0.4082,245,255,259,9.5,3.790542,2.506238,5.709458
9,A11,2.0,0.4128,275,267,270,10.5,5.287252,1.985909,5.212748


In [1075]:
raw_df.columns

Index(['Name', 'p', 'C NaOH', 'Khối lượng(g)', 'Đường kính ngang(mm)',
       'Đường kính dọc(mm)', 'Độ đường (Brix %)', 'TA (%)', 'Brix:TA',
       'BrmTA'],
      dtype='object')

In [1076]:
raw_df.shape

(81, 10)

In [1077]:
new_column_names = {
    "Name": "name",
    "p": "v_naoh",
    "C NaOH": "c_naoh",
    "Khối lượng(g)": "mass",
    "Đường kính ngang(mm)": "h_diameter",
    "Đường kính dọc(mm)": "v_diameter",
    "Độ đường (Brix %)": "brix",
    "TA (%)": "ta",
    "Brix:TA": "brix_ta",
    "BrmTA": "brm_ta",
}

std_col_name_df = raw_df.rename(columns=new_column_names)

In [1078]:
std_col_name_df.head(10)

Unnamed: 0,name,v_naoh,c_naoh,mass,h_diameter,v_diameter,brix,ta,brix_ta,brm_ta
0,A1,1.9,0.4,255,257,262,10.9,4.867141,2.239508,6.032859
1,A2,1.2,0.4,325,288,278,9.8,3.073984,3.188045,6.726016
2,A3,2.3,0.4,185,235,237,11.0,5.891803,1.867001,5.108197
3,A4,1.4,0.4128,203,245,245,9.0,3.701077,2.431725,5.298923
4,A5,1.2,0.4,229,253,249,10.9,3.073984,3.545887,7.826016
5,A7,1.3,0.4128,357,305,290,10.7,3.436714,3.113439,7.263286
6,A8,2.0,0.4,309,275,285,11.2,5.123307,2.186088,6.076693
7,A9,1.9,0.4128,321,285,280,9.2,5.02289,1.831615,4.17711
8,A10,1.45,0.4082,245,255,259,9.5,3.790542,2.506238,5.709458
9,A11,2.0,0.4128,275,267,270,10.5,5.287252,1.985909,5.212748


In [1079]:
std_col_name_dataset = Dataset("std_col_name", std_col_name_df, ScalerType.Raw_)

In [1080]:
print(std_col_name_dataset.categorical_cols)
print(std_col_name_dataset.numeric_cols)
std_col_name_dataset.dataframe

['name']
['v_naoh', 'c_naoh', 'mass', 'h_diameter', 'v_diameter', 'brix', 'ta', 'brix_ta', 'brm_ta']


Unnamed: 0,name,v_naoh,c_naoh,mass,h_diameter,v_diameter,brix,ta,brix_ta,brm_ta
0,A1,1.90,0.4000,255,257,262,10.9,4.867141,2.239508,6.032859
1,A2,1.20,0.4000,325,288,278,9.8,3.073984,3.188045,6.726016
2,A3,2.30,0.4000,185,235,237,11.0,5.891803,1.867001,5.108197
3,A4,1.40,0.4128,203,245,245,9.0,3.701077,2.431725,5.298923
4,A5,1.20,0.4000,229,253,249,10.9,3.073984,3.545887,7.826016
...,...,...,...,...,...,...,...,...,...,...
76,C24,1.60,0.4016,177,225,216,12.5,4.115040,3.037638,8.384960
77,C25,1.60,0.4016,173,216,220,11.1,4.115040,2.697422,6.984960
78,C26,0.95,0.4016,126,195,197,12.8,2.443305,5.238806,10.356695
79,C28,1.70,0.4016,181,216,232,11.8,4.372230,2.698852,7.427770


In [1081]:
list_dataset["std_col_name"] = std_col_name_dataset

In [1082]:
list_dataset["std_col_name"].get_dataframe().head(10)

Unnamed: 0,name,v_naoh,c_naoh,mass,h_diameter,v_diameter,brix,ta,brix_ta,brm_ta
0,A1,1.9,0.4,255,257,262,10.9,4.867141,2.239508,6.032859
1,A2,1.2,0.4,325,288,278,9.8,3.073984,3.188045,6.726016
2,A3,2.3,0.4,185,235,237,11.0,5.891803,1.867001,5.108197
3,A4,1.4,0.4128,203,245,245,9.0,3.701077,2.431725,5.298923
4,A5,1.2,0.4,229,253,249,10.9,3.073984,3.545887,7.826016
5,A7,1.3,0.4128,357,305,290,10.7,3.436714,3.113439,7.263286
6,A8,2.0,0.4,309,275,285,11.2,5.123307,2.186088,6.076693
7,A9,1.9,0.4128,321,285,280,9.2,5.02289,1.831615,4.17711
8,A10,1.45,0.4082,245,255,259,9.5,3.790542,2.506238,5.709458
9,A11,2.0,0.4128,275,267,270,10.5,5.287252,1.985909,5.212748


In [1083]:
if GENERATE_PLOTS:
    plot_along_dataset(list_dataset["std_col_name"], 3, 3, (12, 12))

#### From the shape of lines in graph "mass", "h_diameter", "v_diameter" and "brix", we can see the lines could be divided into 3 levels along dataset. We assume that there are 3 types of data in the dataset. We could double check the name / label of dataset.

In [1084]:
list_dataset["std_col_name"].get_dataframe().columns

Index(['name', 'v_naoh', 'c_naoh', 'mass', 'h_diameter', 'v_diameter', 'brix',
       'ta', 'brix_ta', 'brm_ta'],
      dtype='object')

In [1085]:
print(list_dataset["std_col_name"].get_dataframe()["name"].to_list())

['A1', 'A2', 'A3', 'A4', 'A5', 'A7', 'A8', 'A9', 'A10', 'A11', 'A13', 'A14', 'A15', 'A16', 'A17', 'A18', 'A19', 'A20', 'A21', 'A24', 'A25', 'A26', 'A27', 'A28', 'A29', 'A30', 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B9', 'B10', 'B11', 'B12', 'B13', 'B14', 'B15', 'B16', 'B17', 'B18', 'B19', 'B20', 'B21', 'B22', 'B23', 'B24', 'B25', 'B26', 'B27', 'B28', 'B29', 'B30', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C8', 'C9', 'C10', 'C11', 'C12', 'C14', 'C15', 'C16', 'C17', 'C19', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C28', 'C29']


We count the number of type to see whether the amount is equivalent to the shape of lines in the following graphs: "mass", "h_diameter", "v_diameter" and "brix"

In [1086]:
import re

data_type = []
for name in list_dataset["std_col_name"].get_dataframe()["name"].str.upper():
    data_type.append(re.findall("[A-Z]", name)[0])

data_type = pd.DataFrame(data_type, columns=["type"])
data_type["count"] = 1
data_type.groupby(["type"]).count()["count"]

type
A    26
B    30
C    25
Name: count, dtype: int64

In [1087]:
list_dataset["std_col_name"].concat_dataframe(data_type["type"])

In [1088]:
list_dataset["std_col_name"].get_dataframe().head(10)

Unnamed: 0,name,v_naoh,c_naoh,mass,h_diameter,v_diameter,brix,ta,brix_ta,brm_ta,type
0,A1,1.9,0.4,255,257,262,10.9,4.867141,2.239508,6.032859,A
1,A2,1.2,0.4,325,288,278,9.8,3.073984,3.188045,6.726016,A
2,A3,2.3,0.4,185,235,237,11.0,5.891803,1.867001,5.108197,A
3,A4,1.4,0.4128,203,245,245,9.0,3.701077,2.431725,5.298923,A
4,A5,1.2,0.4,229,253,249,10.9,3.073984,3.545887,7.826016,A
5,A7,1.3,0.4128,357,305,290,10.7,3.436714,3.113439,7.263286,A
6,A8,2.0,0.4,309,275,285,11.2,5.123307,2.186088,6.076693,A
7,A9,1.9,0.4128,321,285,280,9.2,5.02289,1.831615,4.17711,A
8,A10,1.45,0.4082,245,255,259,9.5,3.790542,2.506238,5.709458,A
9,A11,2.0,0.4128,275,267,270,10.5,5.287252,1.985909,5.212748,A


## Analyze quantitative data

### Plots for all rows

In [1089]:
if GENERATE_PLOTS:
    plot_hist_kde_box_all_columns(list_dataset["std_col_name"])

In [1090]:
list_dataset["std_col_name"].get_dataframe().describe()

Unnamed: 0,v_naoh,c_naoh,mass,h_diameter,v_diameter,brix,ta,brix_ta,brm_ta
count,81.0,81.0,81.0,81.0,81.0,81.0,81.0,81.0,81.0
mean,1.477778,0.406857,222.197531,241.777778,241.358025,11.169259,3.849815,3.024935,7.319445
std,0.297069,0.007092,50.582215,23.652167,22.587225,1.362702,0.774895,0.78652,1.587514
min,0.8,0.4,126.0,195.0,197.0,7.4,2.05752,1.795715,4.076693
25%,1.3,0.4016,179.0,222.0,223.0,10.4,3.370111,2.512846,6.148368
50%,1.4,0.4048,228.0,242.0,242.0,11.1,3.701077,2.920633,7.332125
75%,1.6,0.4082,255.0,257.0,255.0,12.3,4.229802,3.371688,8.199505
max,2.3,0.423,357.0,305.0,290.0,14.0,5.891803,6.609899,11.54248


In [1091]:
if GENERATE_PLOTS:
    plot_all_columns_group_by_a_categorical_col(list_dataset["std_col_name"], "type")

#### From above plots, we see that "mass", "h_diameter", "v_diameter" and "brm_ta" plots have clearly patterns corresponding with types.

## Analyze qualitative data

In [1092]:
# partitions_dataframe = divide_dataframe_with_categorical_col(list_dataset["std_col_name"], "type")

### Use Welch's t-test to see whether the data could be divided into 3 parts A, B, and C.

In [1093]:
# for group in partitions_dataframe.keys():
#     print(f"{group}: {partitions_dataframe[group].shape}")

In [1094]:
# partitions_dataframe["A"].head(10)

In [1095]:
# columns_to_compare = ["mass", "h_diameter", "v_diameter"]

In [1096]:
# calculate_p_values_between_partitions(partitions_dataframe, columns_to_compare)

### All p-values smaller than $ \alpha $ = 0.05
### => Conclusion from Welch's t-test: The dataset could be divided into 3 parts A, B and C

## Matrix plots

### Pairplots

In [1097]:
if GENERATE_PLOTS:
    plt.figure(figsize=(16, 16))
    sns.pairplot(list_dataset["std_col_name"].get_dataframe())

#### Some potential features to explain brix are "mass", "h_diameter", "v_diameter", "brix_ta", "brm_ta".

### Heatmap

In [1098]:
if GENERATE_PLOTS:
    plt.figure(figsize=(12, 12))
    dataframe = list_dataset["std_col_name"].get_dataframe()
    numeric_cols = list_dataset["std_col_name"].numeric_cols
    dataset_corr = dataframe[numeric_cols].corr()
    sns.heatmap(dataset_corr, annot=True)

#### => Some features that could be used for model: **mass, h_diameter, v_diameter, brix_ta, brm_ta** (quantitative) and **type** (qualitative) to explain **brix**. However, **brix_ta** and **brm_ta** are directly related to **brix**, then they will not be considered.

## Build Models

### Create "ohv_std_col_name" dataframe: ["brix", "mass", "h_diameter", "v_diameter", one-hot-vector-of-type]

In [1099]:
ohv_dataset = list_dataset["std_col_name"].get_one_hot_vectorized_dataset("type")
list_dataset["std_col_name_ohv_type"] = ohv_dataset
list_dataset["std_col_name_ohv_type"].get_dataframe().head(10)

std_col_name_ohv_type


Unnamed: 0,A,B,C,name,v_naoh,c_naoh,mass,h_diameter,v_diameter,brix,ta,brix_ta,brm_ta,type
0,True,False,False,A1,1.9,0.4,255,257,262,10.9,4.867141,2.239508,6.032859,A
1,True,False,False,A2,1.2,0.4,325,288,278,9.8,3.073984,3.188045,6.726016,A
2,True,False,False,A3,2.3,0.4,185,235,237,11.0,5.891803,1.867001,5.108197,A
3,True,False,False,A4,1.4,0.4128,203,245,245,9.0,3.701077,2.431725,5.298923,A
4,True,False,False,A5,1.2,0.4,229,253,249,10.9,3.073984,3.545887,7.826016,A
5,True,False,False,A7,1.3,0.4128,357,305,290,10.7,3.436714,3.113439,7.263286,A
6,True,False,False,A8,2.0,0.4,309,275,285,11.2,5.123307,2.186088,6.076693,A
7,True,False,False,A9,1.9,0.4128,321,285,280,9.2,5.02289,1.831615,4.17711,A
8,True,False,False,A10,1.45,0.4082,245,255,259,9.5,3.790542,2.506238,5.709458,A
9,True,False,False,A11,2.0,0.4128,275,267,270,10.5,5.287252,1.985909,5.212748,A


In [1100]:
y_cols = ["brix"]
y_variable = list_dataset["std_col_name_ohv_type"].get_dataframe()[y_cols]
y_variable.head(10)

Unnamed: 0,brix
0,10.9
1,9.8
2,11.0
3,9.0
4,10.9
5,10.7
6,11.2
7,9.2
8,9.5
9,10.5


In [1101]:
x_cols = ["mass", "h_diameter", "v_diameter", "C"]
x_variable = list_dataset["std_col_name_ohv_type"].get_dataframe()[x_cols]
x_variable.head(10)

Unnamed: 0,mass,h_diameter,v_diameter,C
0,255,257,262,False
1,325,288,278,False
2,185,235,237,False
3,203,245,245,False
4,229,253,249,False
5,357,305,290,False
6,309,275,285,False
7,321,285,280,False
8,245,255,259,False
9,275,267,270,False


### Linear regression models

#### Model 1: brix $ \sim \beta_0 $ + $ \beta_1 $ * type_a + $ \beta_2 $ * type_b + $ \beta_3 $ * type_c + $ \beta_4 $ * h.diameter + $ \beta_5 $ * v.diameter + $ \beta_6 $ * mass

working

In [1102]:
list_dataset.keys()

dict_keys(['std_col_name', 'std_col_name_ohv_type'])

In [1103]:
std_dataset_names = ["std_col_name_ohv_type"]

In [1104]:
!ls

cropped.rar
main_orange.ipynb
orange-Copy1.ipynb
orange.ipynb
orange_anh_hieu-Copy1.ipynb
orange_anh_hieu.ipynb
orange_best.ipynb
orange_data.xlsx
resnet34.xlsx
result_2.xlsx
result_3.xlsx
result_4.xlsx
result_vit_base_patch16_224.dino_best_054777.xlsx
result_vit_base_patch16_224.dino_best_05526.xlsx
result_vit_base_patch16_224.dino_best_54_1.xlsx
result_XGB_get_10_new.xlsx
TIMM.ipynb
vit_base_patch14_dinov2.lvd142m_get_30.xlsx
vit_base_patch16_224.dino_get_10.xlsx
vit_small_patch14_dinov2.lvd142m_get_10.xlsx
vit_small_patch14_dinov2.lvd142m_get_20.xlsx
vit_small_patch14_dinov2.lvd142m_get_30.xlsx
vit_small_patch14_dinov2.lvd142m_get_5.xlsx
vit_small_patch14_dinov2.lvd142m_get_97.xlsx
vit_small_patch14_dinov2.lvd142m_get_one.xlsx
vit_small_patch14_dinov2.lvd142m_mean.xlsx


In [1105]:
model_list = [
    # "vit_large_patch14_dinov2.lvd142m",
        "vit_base_patch16_224.dino",
    #"vit_small_patch14_dinov2.lvd142m",
    # "eva_large_patch14_196.in22k_ft_in22k_in1k",
    # "vit_base_patch14_dinov2.lvd142m",
    # 'resnet34'
]

hyper_params = {"SVR": {
          "kernel": ["rbf", "linear", "poly"],
          "C": [0.0001, 0.001, 0.01, 0.05, 0.1]
        },

    "RANDOM_FOREST": {
        "n_estimators": np.arange(100, 1100, 100),
        "max_depth": [None, 5, 10, 15, 20],
        "max_features": [10, 50, 100, 150, 200, "sqrt", "log2", None]
    },
    "LIGHT_GBM": {
        "max_depth": [-1], # , 5, 10, 15, 20],
        "learning_rate":  [0.015], # 0.01, 0.05, 0.1],
        "min_child_samples": [20],
         "n_estimators": np.arange(1900, 2000, 100)
        # "subsample" : [1.0, 0.8, 0.6, 0.5],
        #  "colsample_bytree" : [0.1, 0.2, 0.4, 0.6, 0.8, 1],
        #   "reg_alpha": [0, 0.01, 0.05, 0.1, 0.2],
        # "reg_lambda": [0, 0.01, 0.05, 0.1, 0.2],

    }
    }



import copy
from sklearn.model_selection import ParameterGrid

param_search = {
          "LIGHT_GBM" : list(ParameterGrid(hyper_params["LIGHT_GBM"])),
          # "SVR" : list(ParameterGrid(hyper_params["SVR"]))
          }

In [1106]:
# result_frame: ResultFrame = ResultFrame()
# secondary_model_list = [
#     # "vit_large_patch14_dinov2.lvd142m",
#     # "vit_small_patch14_dinov2.lvd142m",
#     # "eva_large_patch14_196.in22k_ft_in22k_in1k",
#     "vit_base_patch14_dinov2.lvd142m",
#     # 'resnet34'
#     # "vit_base_patch16_224.mae"
# ]
# # secondary_options = ["mean", "get_one", "get_97"] + [f"get_{k}" for k in [5, 10, 20, 30]] + [f"interval_{k}" for k in [5, 10, 20, 30]]
# secondary_options =  [f"get_{k}" for k in [30]]
# # options = ["mean", "get_one", "get_97"] + [f"get_{k}" for k in [5, 10, 20, 30]]
# options = ["get_10"]
# for model_name in model_list:
#   for option in options:
#     resnet_dataset = pd.read_excel(f"{model_name}_{option}.xlsx")
#     df_orig = list_dataset["std_col_name_ohv_type"].get_dataframe()
#     for snd_model in secondary_model_list:
#       for opt in secondary_options:
#         df_new = pd.merge(df_orig, resnet_dataset, on = "name")
#         df_snd = pd.read_excel(f"{snd_model}_{opt}.xlsx")
#         df_new = pd.merge(df_new, df_snd, on = "name")
#         snd_cols = list(df_snd.drop("name", axis = 1).columns)
#         dataset_orig_resnet =  Dataset("orig_resnet", df_new, ScalerType.Raw_)


#         list_dataset["orig_resnet"] = dataset_orig_resnet
#         orig_cols = ['mass', 'h_diameter', 'C']
#         resnet_cols = list(resnet_dataset.drop("name", axis = 1).columns)

#         x_cols = ['C'] + resnet_cols
#         feature_all = resnet_cols + snd_cols
#         feature_r2 = {}
#         for feature in resnet_cols + snd_cols:
#           feats_now = [feature]
#           feature_collection = {"orig_cols": orig_cols, "x_cols": x_cols, "resnet_cols": feats_now}
#           for dataset_name in std_dataset_names:
#               for model_type in ModelType:
#                 model_family = model_type.__str__().split(".")[-1]
#                 if model_family in ["DECISION_TREE", "XGB", "LINEAR_REGRESSION", "RANDOM_FOREST", "KNN"]:
#                   continue
#                 if model_family in param_search.keys():
#                   for param in param_search[model_family]:
#                     for key, value in param.items():
#                       model_type.value[key] = value
#                     for key, value in feature_collection.items():
#                         if key in ["orig_cols", "x_cols"]:
#                             continue
#                         name = f"{model_family}_{key}_{model_name}_{option}_{str(param)}_{snd_model}_{opt}"
#                         print(f"Training {name}")
#                         model = Model(
#                             model_type=model_type,
#                             name= name,
#                             x_cols=value,
#                             y_cols=y_cols,
#                             dataset=list_dataset["orig_resnet"],
#                             scaler_type=ScalerType.Raw_,
#                         )
#                         result = model.evaluate()
#                         r2  =    result.result[-2]
#                         mse =   result.result[-1]
#                         feature_r2[feature] = r2
#                         # if r2  > max_R2:
#                         #   max_R2 = r2
#                         #   selected_feats = feats_now
#                         print(f"R2: {r2} | MSE: {mse}")
#                         result_frame.add_result(result)


# result_frame.display_result()
# # print(max_R2)
# # print(selected_feats)

In [1107]:
# feature_r2_sorted={k: v for k, v in sorted(feature_r2.items(), key=lambda item: item[1], reverse = True)}


In [1108]:
# selected_feats = [feat for feat in feature_r2_sorted.keys() if feature_r2_sorted[feat]>0.0]

In [1109]:
# len(selected_feats)

In [1110]:
# print(selected_feats)

In [1111]:
selected_feats_backup = ['timm/vit_base_patch14_dinov2.lvd142m_701', 'timm/vit_base_patch14_dinov2.lvd142m_529', 'timm/vit_base_patch14_dinov2.lvd142m_740', 'timm/vit_base_patch16_224.dino_158', 'timm/vit_base_patch16_224.dino_74', 'timm/vit_base_patch14_dinov2.lvd142m_119', 'timm/vit_base_patch14_dinov2.lvd142m_661', 'timm/vit_base_patch14_dinov2.lvd142m_366', 'timm/vit_base_patch16_224.dino_355', 'timm/vit_base_patch14_dinov2.lvd142m_440', 'timm/vit_base_patch14_dinov2.lvd142m_22', 'timm/vit_base_patch16_224.dino_317', 'timm/vit_base_patch16_224.dino_178', 'timm/vit_base_patch14_dinov2.lvd142m_336', 'timm/vit_base_patch16_224.dino_706', 'timm/vit_base_patch14_dinov2.lvd142m_362', 'timm/vit_base_patch16_224.dino_42', 'timm/vit_base_patch14_dinov2.lvd142m_74', 'timm/vit_base_patch14_dinov2.lvd142m_59', 'timm/vit_base_patch14_dinov2.lvd142m_584', 'timm/vit_base_patch16_224.dino_221', 'timm/vit_base_patch14_dinov2.lvd142m_402', 'timm/vit_base_patch14_dinov2.lvd142m_710', 'timm/vit_base_patch16_224.dino_396', 'timm/vit_base_patch14_dinov2.lvd142m_497', 'timm/vit_base_patch14_dinov2.lvd142m_40', 'timm/vit_base_patch16_224.dino_108', 'timm/vit_base_patch14_dinov2.lvd142m_53', 'timm/vit_base_patch16_224.dino_457', 'timm/vit_base_patch14_dinov2.lvd142m_214', 'timm/vit_base_patch14_dinov2.lvd142m_355', 'timm/vit_base_patch14_dinov2.lvd142m_454', 'timm/vit_base_patch14_dinov2.lvd142m_227', 'timm/vit_base_patch16_224.dino_281', 'timm/vit_base_patch14_dinov2.lvd142m_106', 'timm/vit_base_patch14_dinov2.lvd142m_451', 'timm/vit_base_patch14_dinov2.lvd142m_671', 'timm/vit_base_patch14_dinov2.lvd142m_82', 'timm/vit_base_patch16_224.dino_318', 'timm/vit_base_patch14_dinov2.lvd142m_718', 'timm/vit_base_patch16_224.dino_759', 'timm/vit_base_patch16_224.dino_165', 'timm/vit_base_patch16_224.dino_591', 'timm/vit_base_patch14_dinov2.lvd142m_642', 'timm/vit_base_patch16_224.dino_332', 'timm/vit_base_patch14_dinov2.lvd142m_37', 'timm/vit_base_patch16_224.dino_434', 'timm/vit_base_patch14_dinov2.lvd142m_326', 'timm/vit_base_patch14_dinov2.lvd142m_271', 'timm/vit_base_patch16_224.dino_532', 'timm/vit_base_patch16_224.dino_718', 'timm/vit_base_patch14_dinov2.lvd142m_511', 'timm/vit_base_patch14_dinov2.lvd142m_494', 'timm/vit_base_patch16_224.dino_574', 'timm/vit_base_patch14_dinov2.lvd142m_571', 'timm/vit_base_patch16_224.dino_250', 'timm/vit_base_patch16_224.dino_247', 'timm/vit_base_patch16_224.dino_122', 'timm/vit_base_patch14_dinov2.lvd142m_414', 'timm/vit_base_patch14_dinov2.lvd142m_704', 'timm/vit_base_patch16_224.dino_32', 'timm/vit_base_patch16_224.dino_146', 'timm/vit_base_patch16_224.dino_43', 'timm/vit_base_patch16_224.dino_245', 'timm/vit_base_patch16_224.dino_35', 'timm/vit_base_patch14_dinov2.lvd142m_580', 'timm/vit_base_patch14_dinov2.lvd142m_455', 'timm/vit_base_patch14_dinov2.lvd142m_10', 'timm/vit_base_patch14_dinov2.lvd142m_6', 'timm/vit_base_patch16_224.dino_397', 'timm/vit_base_patch16_224.dino_764', 'timm/vit_base_patch14_dinov2.lvd142m_90', 'timm/vit_base_patch14_dinov2.lvd142m_69', 'timm/vit_base_patch16_224.dino_765', 'timm/vit_base_patch14_dinov2.lvd142m_502', 'timm/vit_base_patch14_dinov2.lvd142m_156', 'timm/vit_base_patch16_224.dino_198', 'timm/vit_base_patch14_dinov2.lvd142m_79', 'timm/vit_base_patch14_dinov2.lvd142m_614', 'timm/vit_base_patch14_dinov2.lvd142m_223', 'timm/vit_base_patch14_dinov2.lvd142m_127', 'timm/vit_base_patch16_224.dino_17', 'timm/vit_base_patch16_224.dino_680', 'timm/vit_base_patch16_224.dino_307', 'timm/vit_base_patch16_224.dino_446', 'timm/vit_base_patch16_224.dino_48', 'timm/vit_base_patch14_dinov2.lvd142m_713', 'timm/vit_base_patch14_dinov2.lvd142m_762', 'timm/vit_base_patch14_dinov2.lvd142m_255', 'timm/vit_base_patch14_dinov2.lvd142m_433', 'timm/vit_base_patch14_dinov2.lvd142m_2', 'timm/vit_base_patch14_dinov2.lvd142m_353', 'timm/vit_base_patch16_224.dino_725', 'timm/vit_base_patch14_dinov2.lvd142m_432', 'timm/vit_base_patch16_224.dino_469', 'timm/vit_base_patch14_dinov2.lvd142m_491', 'timm/vit_base_patch14_dinov2.lvd142m_335', 'timm/vit_base_patch16_224.dino_125', 'timm/vit_base_patch14_dinov2.lvd142m_268', 'timm/vit_base_patch16_224.dino_698', 'timm/vit_base_patch14_dinov2.lvd142m_728', 'timm/vit_base_patch16_224.dino_292', 'timm/vit_base_patch16_224.dino_758', 'timm/vit_base_patch16_224.dino_522', 'timm/vit_base_patch14_dinov2.lvd142m_339', 'timm/vit_base_patch16_224.dino_356', 'timm/vit_base_patch16_224.dino_147', 'timm/vit_base_patch14_dinov2.lvd142m_133', 'timm/vit_base_patch16_224.dino_573', 'timm/vit_base_patch14_dinov2.lvd142m_633', 'timm/vit_base_patch14_dinov2.lvd142m_113', 'timm/vit_base_patch14_dinov2.lvd142m_605', 'timm/vit_base_patch14_dinov2.lvd142m_244', 'timm/vit_base_patch14_dinov2.lvd142m_677', 'timm/vit_base_patch16_224.dino_239', 'timm/vit_base_patch14_dinov2.lvd142m_43', 'timm/vit_base_patch14_dinov2.lvd142m_525', 'timm/vit_base_patch14_dinov2.lvd142m_54', 'timm/vit_base_patch16_224.dino_297', 'timm/vit_base_patch16_224.dino_321', 'timm/vit_base_patch16_224.dino_121', 'timm/vit_base_patch16_224.dino_423', 'timm/vit_base_patch14_dinov2.lvd142m_456', 'timm/vit_base_patch14_dinov2.lvd142m_751', 'timm/vit_base_patch16_224.dino_539', 'timm/vit_base_patch14_dinov2.lvd142m_673', 'timm/vit_base_patch14_dinov2.lvd142m_392', 'timm/vit_base_patch14_dinov2.lvd142m_697', 'timm/vit_base_patch14_dinov2.lvd142m_577', 'timm/vit_base_patch14_dinov2.lvd142m_24', 'timm/vit_base_patch14_dinov2.lvd142m_311', 'timm/vit_base_patch14_dinov2.lvd142m_534', 'timm/vit_base_patch14_dinov2.lvd142m_546', 'timm/vit_base_patch14_dinov2.lvd142m_369', 'timm/vit_base_patch16_224.dino_749', 'timm/vit_base_patch16_224.dino_274', 'timm/vit_base_patch14_dinov2.lvd142m_14', 'timm/vit_base_patch14_dinov2.lvd142m_438', 'timm/vit_base_patch14_dinov2.lvd142m_589', 'timm/vit_base_patch16_224.dino_357', 'timm/vit_base_patch14_dinov2.lvd142m_567', 'timm/vit_base_patch16_224.dino_515', 'timm/vit_base_patch14_dinov2.lvd142m_695', 'timm/vit_base_patch16_224.dino_362', 'timm/vit_base_patch14_dinov2.lvd142m_447', 'timm/vit_base_patch14_dinov2.lvd142m_85', 'timm/vit_base_patch14_dinov2.lvd142m_282', 'timm/vit_base_patch14_dinov2.lvd142m_198', 'timm/vit_base_patch14_dinov2.lvd142m_319', 'timm/vit_base_patch16_224.dino_180', 'timm/vit_base_patch16_224.dino_482', 'timm/vit_base_patch16_224.dino_324', 'timm/vit_base_patch16_224.dino_83', 'timm/vit_base_patch14_dinov2.lvd142m_98', 'timm/vit_base_patch14_dinov2.lvd142m_76', 'timm/vit_base_patch14_dinov2.lvd142m_55', 'timm/vit_base_patch16_224.dino_516', 'timm/vit_base_patch14_dinov2.lvd142m_530', 'timm/vit_base_patch14_dinov2.lvd142m_249', 'timm/vit_base_patch14_dinov2.lvd142m_603', 'timm/vit_base_patch14_dinov2.lvd142m_318', 'timm/vit_base_patch14_dinov2.lvd142m_532', 'timm/vit_base_patch16_224.dino_542', 'timm/vit_base_patch14_dinov2.lvd142m_749', 'timm/vit_base_patch14_dinov2.lvd142m_493', 'timm/vit_base_patch14_dinov2.lvd142m_276', 'timm/vit_base_patch16_224.dino_260', 'timm/vit_base_patch16_224.dino_200', 'timm/vit_base_patch16_224.dino_116', 'timm/vit_base_patch14_dinov2.lvd142m_668', 'timm/vit_base_patch16_224.dino_226', 'timm/vit_base_patch14_dinov2.lvd142m_398', 'timm/vit_base_patch14_dinov2.lvd142m_643', 'timm/vit_base_patch16_224.dino_521', 'timm/vit_base_patch16_224.dino_186', 'timm/vit_base_patch14_dinov2.lvd142m_514', 'timm/vit_base_patch14_dinov2.lvd142m_103', 'timm/vit_base_patch14_dinov2.lvd142m_555', 'timm/vit_base_patch16_224.dino_216', 'timm/vit_base_patch16_224.dino_614', 'timm/vit_base_patch14_dinov2.lvd142m_656', 'timm/vit_base_patch16_224.dino_588', 'timm/vit_base_patch16_224.dino_238', 'timm/vit_base_patch14_dinov2.lvd142m_628', 'timm/vit_base_patch16_224.dino_611', 'timm/vit_base_patch14_dinov2.lvd142m_31', 'timm/vit_base_patch14_dinov2.lvd142m_32', 'timm/vit_base_patch14_dinov2.lvd142m_596', 'timm/vit_base_patch16_224.dino_704', 'timm/vit_base_patch14_dinov2.lvd142m_709', 'timm/vit_base_patch16_224.dino_654', 'timm/vit_base_patch14_dinov2.lvd142m_431', 'timm/vit_base_patch16_224.dino_284', 'timm/vit_base_patch16_224.dino_197', 'timm/vit_base_patch16_224.dino_364', 'timm/vit_base_patch16_224.dino_488', 'timm/vit_base_patch14_dinov2.lvd142m_599', 'timm/vit_base_patch16_224.dino_438', 'timm/vit_base_patch14_dinov2.lvd142m_641', 'timm/vit_base_patch14_dinov2.lvd142m_722', 'timm/vit_base_patch16_224.dino_592', 'timm/vit_base_patch16_224.dino_586', 'timm/vit_base_patch16_224.dino_505', 'timm/vit_base_patch16_224.dino_625', 'timm/vit_base_patch14_dinov2.lvd142m_707', 'timm/vit_base_patch14_dinov2.lvd142m_586', 'timm/vit_base_patch14_dinov2.lvd142m_211', 'timm/vit_base_patch16_224.dino_731', 'timm/vit_base_patch14_dinov2.lvd142m_763', 'timm/vit_base_patch16_224.dino_492', 'timm/vit_base_patch14_dinov2.lvd142m_94', 'timm/vit_base_patch16_224.dino_425', 'timm/vit_base_patch14_dinov2.lvd142m_760', 'timm/vit_base_patch16_224.dino_232', 'timm/vit_base_patch14_dinov2.lvd142m_239', 'timm/vit_base_patch14_dinov2.lvd142m_81', 'timm/vit_base_patch14_dinov2.lvd142m_419', 'timm/vit_base_patch14_dinov2.lvd142m_374', 'timm/vit_base_patch14_dinov2.lvd142m_278', 'timm/vit_base_patch14_dinov2.lvd142m_575', 'timm/vit_base_patch14_dinov2.lvd142m_767', 'timm/vit_base_patch14_dinov2.lvd142m_116', 'timm/vit_base_patch16_224.dino_447', 'timm/vit_base_patch16_224.dino_59', 'timm/vit_base_patch16_224.dino_287', 'timm/vit_base_patch16_224.dino_462', 'timm/vit_base_patch14_dinov2.lvd142m_675', 'timm/vit_base_patch16_224.dino_595', 'timm/vit_base_patch14_dinov2.lvd142m_134', 'timm/vit_base_patch16_224.dino_711', 'timm/vit_base_patch14_dinov2.lvd142m_563', 'timm/vit_base_patch16_224.dino_230', 'timm/vit_base_patch14_dinov2.lvd142m_243', 'timm/vit_base_patch14_dinov2.lvd142m_533', 'timm/vit_base_patch14_dinov2.lvd142m_557', 'timm/vit_base_patch16_224.dino_433', 'timm/vit_base_patch16_224.dino_599', 'timm/vit_base_patch14_dinov2.lvd142m_51', 'timm/vit_base_patch14_dinov2.lvd142m_348', 'timm/vit_base_patch14_dinov2.lvd142m_543', 'timm/vit_base_patch16_224.dino_561', 'timm/vit_base_patch16_224.dino_589', 'timm/vit_base_patch16_224.dino_201', 'timm/vit_base_patch14_dinov2.lvd142m_367', 'timm/vit_base_patch14_dinov2.lvd142m_315', 'timm/vit_base_patch14_dinov2.lvd142m_124', 'timm/vit_base_patch16_224.dino_723', 'timm/vit_base_patch14_dinov2.lvd142m_620', 'timm/vit_base_patch14_dinov2.lvd142m_129', 'timm/vit_base_patch16_224.dino_342', 'timm/vit_base_patch16_224.dino_5', 'timm/vit_base_patch16_224.dino_52', 'timm/vit_base_patch16_224.dino_285', 'timm/vit_base_patch14_dinov2.lvd142m_644', 'timm/vit_base_patch14_dinov2.lvd142m_394', 'timm/vit_base_patch14_dinov2.lvd142m_344', 'timm/vit_base_patch14_dinov2.lvd142m_196', 'timm/vit_base_patch14_dinov2.lvd142m_95', 'timm/vit_base_patch14_dinov2.lvd142m_663', 'timm/vit_base_patch14_dinov2.lvd142m_364', 'timm/vit_base_patch14_dinov2.lvd142m_481', 'timm/vit_base_patch14_dinov2.lvd142m_281', 'timm/vit_base_patch14_dinov2.lvd142m_285', 'timm/vit_base_patch14_dinov2.lvd142m_653', 'timm/vit_base_patch14_dinov2.lvd142m_657', 'timm/vit_base_patch14_dinov2.lvd142m_420', 'timm/vit_base_patch14_dinov2.lvd142m_263', 'timm/vit_base_patch14_dinov2.lvd142m_277', 'timm/vit_base_patch16_224.dino_171', 'timm/vit_base_patch16_224.dino_240', 'timm/vit_base_patch14_dinov2.lvd142m_545', 'timm/vit_base_patch14_dinov2.lvd142m_689', 'timm/vit_base_patch14_dinov2.lvd142m_259', 'timm/vit_base_patch16_224.dino_495', 'timm/vit_base_patch16_224.dino_751', 'timm/vit_base_patch14_dinov2.lvd142m_715', 'timm/vit_base_patch14_dinov2.lvd142m_518', 'timm/vit_base_patch14_dinov2.lvd142m_298', 'timm/vit_base_patch14_dinov2.lvd142m_742', 'timm/vit_base_patch16_224.dino_526', 'timm/vit_base_patch14_dinov2.lvd142m_498', 'timm/vit_base_patch14_dinov2.lvd142m_175', 'timm/vit_base_patch14_dinov2.lvd142m_522', 'timm/vit_base_patch14_dinov2.lvd142m_594', 'timm/vit_base_patch14_dinov2.lvd142m_159', 'timm/vit_base_patch14_dinov2.lvd142m_662', 'timm/vit_base_patch16_224.dino_309', 'timm/vit_base_patch16_224.dino_427', 'timm/vit_base_patch16_224.dino_740', 'timm/vit_base_patch14_dinov2.lvd142m_753', 'timm/vit_base_patch16_224.dino_632', 'timm/vit_base_patch16_224.dino_333', 'timm/vit_base_patch16_224.dino_499', 'timm/vit_base_patch14_dinov2.lvd142m_206', 'timm/vit_base_patch16_224.dino_409', 'timm/vit_base_patch14_dinov2.lvd142m_387', 'timm/vit_base_patch14_dinov2.lvd142m_602', 'timm/vit_base_patch16_224.dino_196', 'timm/vit_base_patch14_dinov2.lvd142m_102', 'timm/vit_base_patch16_224.dino_498', 'timm/vit_base_patch16_224.dino_156', 'timm/vit_base_patch16_224.dino_634', 'timm/vit_base_patch16_224.dino_252', 'timm/vit_base_patch14_dinov2.lvd142m_317', 'timm/vit_base_patch14_dinov2.lvd142m_138', 'timm/vit_base_patch14_dinov2.lvd142m_621', 'timm/vit_base_patch16_224.dino_514', 'timm/vit_base_patch14_dinov2.lvd142m_646', 'timm/vit_base_patch16_224.dino_47', 'timm/vit_base_patch16_224.dino_673', 'timm/vit_base_patch14_dinov2.lvd142m_300', 'timm/vit_base_patch14_dinov2.lvd142m_505', 'timm/vit_base_patch16_224.dino_510', 'timm/vit_base_patch14_dinov2.lvd142m_683', 'timm/vit_base_patch16_224.dino_151', 'timm/vit_base_patch14_dinov2.lvd142m_716', 'timm/vit_base_patch14_dinov2.lvd142m_137', 'timm/vit_base_patch14_dinov2.lvd142m_665', 'timm/vit_base_patch14_dinov2.lvd142m_287', 'timm/vit_base_patch14_dinov2.lvd142m_305', 'timm/vit_base_patch16_224.dino_534', 'timm/vit_base_patch16_224.dino_37', 'timm/vit_base_patch14_dinov2.lvd142m_150', 'timm/vit_base_patch16_224.dino_120', 'timm/vit_base_patch16_224.dino_288', 'timm/vit_base_patch16_224.dino_142', 'timm/vit_base_patch14_dinov2.lvd142m_726', 'timm/vit_base_patch16_224.dino_621', 'timm/vit_base_patch14_dinov2.lvd142m_635', 'timm/vit_base_patch16_224.dino_400', 'timm/vit_base_patch16_224.dino_645', 'timm/vit_base_patch14_dinov2.lvd142m_721', 'timm/vit_base_patch16_224.dino_184', 'timm/vit_base_patch16_224.dino_506', 'timm/vit_base_patch14_dinov2.lvd142m_690', 'timm/vit_base_patch14_dinov2.lvd142m_521', 'timm/vit_base_patch14_dinov2.lvd142m_687', 'timm/vit_base_patch14_dinov2.lvd142m_407', 'timm/vit_base_patch14_dinov2.lvd142m_648', 'timm/vit_base_patch14_dinov2.lvd142m_469', 'timm/vit_base_patch14_dinov2.lvd142m_636', 'timm/vit_base_patch14_dinov2.lvd142m_181', 'timm/vit_base_patch14_dinov2.lvd142m_187', 'timm/vit_base_patch16_224.dino_487', 'timm/vit_base_patch14_dinov2.lvd142m_33', 'timm/vit_base_patch14_dinov2.lvd142m_736', 'timm/vit_base_patch16_224.dino_507', 'timm/vit_base_patch14_dinov2.lvd142m_725', 'timm/vit_base_patch14_dinov2.lvd142m_112', 'timm/vit_base_patch16_224.dino_310', 'timm/vit_base_patch16_224.dino_493', 'timm/vit_base_patch14_dinov2.lvd142m_286', 'timm/vit_base_patch14_dinov2.lvd142m_358', 'timm/vit_base_patch14_dinov2.lvd142m_12', 'timm/vit_base_patch14_dinov2.lvd142m_705', 'timm/vit_base_patch14_dinov2.lvd142m_11', 'timm/vit_base_patch16_224.dino_132', 'timm/vit_base_patch16_224.dino_113', 'timm/vit_base_patch14_dinov2.lvd142m_504', 'timm/vit_base_patch16_224.dino_28', 'timm/vit_base_patch14_dinov2.lvd142m_121', 'timm/vit_base_patch16_224.dino_702', 'timm/vit_base_patch14_dinov2.lvd142m_610', 'timm/vit_base_patch16_224.dino_647', 'timm/vit_base_patch16_224.dino_569', 'timm/vit_base_patch16_224.dino_661', 'timm/vit_base_patch14_dinov2.lvd142m_200', 'timm/vit_base_patch14_dinov2.lvd142m_216', 'timm/vit_base_patch16_224.dino_602', 'timm/vit_base_patch16_224.dino_117', 'timm/vit_base_patch14_dinov2.lvd142m_632', 'timm/vit_base_patch16_224.dino_566', 'timm/vit_base_patch16_224.dino_593', 'timm/vit_base_patch14_dinov2.lvd142m_417', 'timm/vit_base_patch14_dinov2.lvd142m_595', 'timm/vit_base_patch16_224.dino_381', 'timm/vit_base_patch16_224.dino_233', 'timm/vit_base_patch14_dinov2.lvd142m_698', 'timm/vit_base_patch14_dinov2.lvd142m_590', 'timm/vit_base_patch16_224.dino_741', 'timm/vit_base_patch14_dinov2.lvd142m_576', 'timm/vit_base_patch16_224.dino_708', 'timm/vit_base_patch14_dinov2.lvd142m_30', 'timm/vit_base_patch14_dinov2.lvd142m_57', 'timm/vit_base_patch14_dinov2.lvd142m_279', 'timm/vit_base_patch14_dinov2.lvd142m_475', 'timm/vit_base_patch16_224.dino_689', 'timm/vit_base_patch16_224.dino_408', 'timm/vit_base_patch16_224.dino_270', 'timm/vit_base_patch16_224.dino_337', 'timm/vit_base_patch16_224.dino_609', 'timm/vit_base_patch14_dinov2.lvd142m_685', 'timm/vit_base_patch16_224.dino_34', 'timm/vit_base_patch14_dinov2.lvd142m_434', 'timm/vit_base_patch14_dinov2.lvd142m_409', 'timm/vit_base_patch14_dinov2.lvd142m_664', 'timm/vit_base_patch14_dinov2.lvd142m_412', 'timm/vit_base_patch16_224.dino_374', 'timm/vit_base_patch14_dinov2.lvd142m_115', 'timm/vit_base_patch14_dinov2.lvd142m_301', 'timm/vit_base_patch14_dinov2.lvd142m_42', 'timm/vit_base_patch14_dinov2.lvd142m_264', 'timm/vit_base_patch16_224.dino_343', 'timm/vit_base_patch14_dinov2.lvd142m_473', 'timm/vit_base_patch14_dinov2.lvd142m_388', 'timm/vit_base_patch16_224.dino_112', 'timm/vit_base_patch16_224.dino_394', 'timm/vit_base_patch16_224.dino_70', 'timm/vit_base_patch14_dinov2.lvd142m_735', 'timm/vit_base_patch16_224.dino_463', 'timm/vit_base_patch16_224.dino_536', 'timm/vit_base_patch14_dinov2.lvd142m_291', 'timm/vit_base_patch14_dinov2.lvd142m_72', 'timm/vit_base_patch16_224.dino_234', 'timm/vit_base_patch14_dinov2.lvd142m_442', 'timm/vit_base_patch14_dinov2.lvd142m_205', 'timm/vit_base_patch16_224.dino_166', 'timm/vit_base_patch14_dinov2.lvd142m_638', 'timm/vit_base_patch16_224.dino_581', 'timm/vit_base_patch16_224.dino_225', 'timm/vit_base_patch16_224.dino_84', 'timm/vit_base_patch14_dinov2.lvd142m_185', 'timm/vit_base_patch14_dinov2.lvd142m_295', 'timm/vit_base_patch14_dinov2.lvd142m_699', 'timm/vit_base_patch14_dinov2.lvd142m_470', 'timm/vit_base_patch16_224.dino_454', 'timm/vit_base_patch16_224.dino_544', 'timm/vit_base_patch16_224.dino_412', 'timm/vit_base_patch16_224.dino_80', 'timm/vit_base_patch14_dinov2.lvd142m_526', 'timm/vit_base_patch16_224.dino_143', 'timm/vit_base_patch16_224.dino_388', 'timm/vit_base_patch14_dinov2.lvd142m_437', 'timm/vit_base_patch14_dinov2.lvd142m_246', 'timm/vit_base_patch16_224.dino_754', 'timm/vit_base_patch14_dinov2.lvd142m_548', 'timm/vit_base_patch14_dinov2.lvd142m_538', 'timm/vit_base_patch16_224.dino_719', 'timm/vit_base_patch14_dinov2.lvd142m_761', 'timm/vit_base_patch16_224.dino_4', 'timm/vit_base_patch16_224.dino_289', 'timm/vit_base_patch14_dinov2.lvd142m_578', 'timm/vit_base_patch14_dinov2.lvd142m_331', 'timm/vit_base_patch14_dinov2.lvd142m_416', 'timm/vit_base_patch14_dinov2.lvd142m_96', 'timm/vit_base_patch16_224.dino_497', 'timm/vit_base_patch14_dinov2.lvd142m_608', 'timm/vit_base_patch14_dinov2.lvd142m_194', 'timm/vit_base_patch16_224.dino_75', 'timm/vit_base_patch16_224.dino_257', 'timm/vit_base_patch14_dinov2.lvd142m_489', 'timm/vit_base_patch16_224.dino_379', 'timm/vit_base_patch16_224.dino_757', 'timm/vit_base_patch16_224.dino_420', 'timm/vit_base_patch14_dinov2.lvd142m_561', 'timm/vit_base_patch14_dinov2.lvd142m_560', 'timm/vit_base_patch14_dinov2.lvd142m_18', 'timm/vit_base_patch16_224.dino_552', 'timm/vit_base_patch14_dinov2.lvd142m_182', 'timm/vit_base_patch14_dinov2.lvd142m_515', 'timm/vit_base_patch14_dinov2.lvd142m_406', 'timm/vit_base_patch16_224.dino_179', 'timm/vit_base_patch14_dinov2.lvd142m_225', 'timm/vit_base_patch16_224.dino_228', 'timm/vit_base_patch16_224.dino_760', 'timm/vit_base_patch14_dinov2.lvd142m_296', 'timm/vit_base_patch16_224.dino_294', 'timm/vit_base_patch14_dinov2.lvd142m_77', 'timm/vit_base_patch14_dinov2.lvd142m_466', 'timm/vit_base_patch14_dinov2.lvd142m_108', 'timm/vit_base_patch16_224.dino_319', 'timm/vit_base_patch14_dinov2.lvd142m_288', 'timm/vit_base_patch16_224.dino_558', 'timm/vit_base_patch16_224.dino_220', 'timm/vit_base_patch16_224.dino_523', 'timm/vit_base_patch16_224.dino_100', 'timm/vit_base_patch16_224.dino_323', 'timm/vit_base_patch14_dinov2.lvd142m_501', 'timm/vit_base_patch14_dinov2.lvd142m_186', 'timm/vit_base_patch16_224.dino_548', 'timm/vit_base_patch14_dinov2.lvd142m_78', 'timm/vit_base_patch14_dinov2.lvd142m_413', 'timm/vit_base_patch14_dinov2.lvd142m_415', 'timm/vit_base_patch14_dinov2.lvd142m_48', 'timm/vit_base_patch16_224.dino_370', 'timm/vit_base_patch14_dinov2.lvd142m_328', 'timm/vit_base_patch14_dinov2.lvd142m_84', 'timm/vit_base_patch16_224.dino_135', 'timm/vit_base_patch16_224.dino_190', 'timm/vit_base_patch14_dinov2.lvd142m_443', 'timm/vit_base_patch14_dinov2.lvd142m_618', 'timm/vit_base_patch16_224.dino_636', 'timm/vit_base_patch14_dinov2.lvd142m_631', 'timm/vit_base_patch16_224.dino_244', 'timm/vit_base_patch16_224.dino_314', 'timm/vit_base_patch14_dinov2.lvd142m_435', 'timm/vit_base_patch16_224.dino_568', 'timm/vit_base_patch16_224.dino_266', 'timm/vit_base_patch14_dinov2.lvd142m_321', 'timm/vit_base_patch14_dinov2.lvd142m_588', 'timm/vit_base_patch14_dinov2.lvd142m_467', 'timm/vit_base_patch14_dinov2.lvd142m_741', 'timm/vit_base_patch14_dinov2.lvd142m_130', 'timm/vit_base_patch16_224.dino_25', 'timm/vit_base_patch14_dinov2.lvd142m_322', 'timm/vit_base_patch16_224.dino_641', 'timm/vit_base_patch14_dinov2.lvd142m_485', 'timm/vit_base_patch16_224.dino_188', 'timm/vit_base_patch14_dinov2.lvd142m_123', 'timm/vit_base_patch16_224.dino_671', 'timm/vit_base_patch14_dinov2.lvd142m_581', 'timm/vit_base_patch14_dinov2.lvd142m_592', 'timm/vit_base_patch16_224.dino_531', 'timm/vit_base_patch14_dinov2.lvd142m_62', 'timm/vit_base_patch14_dinov2.lvd142m_241', 'timm/vit_base_patch14_dinov2.lvd142m_174', 'timm/vit_base_patch16_224.dino_242', 'timm/vit_base_patch14_dinov2.lvd142m_114', 'timm/vit_base_patch14_dinov2.lvd142m_445', 'timm/vit_base_patch14_dinov2.lvd142m_492', 'timm/vit_base_patch14_dinov2.lvd142m_375', 'timm/vit_base_patch14_dinov2.lvd142m_474', 'timm/vit_base_patch14_dinov2.lvd142m_444', 'timm/vit_base_patch16_224.dino_460', 'timm/vit_base_patch14_dinov2.lvd142m_15', 'timm/vit_base_patch16_224.dino_236', 'timm/vit_base_patch14_dinov2.lvd142m_240', 'timm/vit_base_patch14_dinov2.lvd142m_556', 'timm/vit_base_patch16_224.dino_76', 'timm/vit_base_patch14_dinov2.lvd142m_559', 'timm/vit_base_patch16_224.dino_235', 'timm/vit_base_patch14_dinov2.lvd142m_330', 'timm/vit_base_patch14_dinov2.lvd142m_537', 'timm/vit_base_patch16_224.dino_426', 'timm/vit_base_patch14_dinov2.lvd142m_193', 'timm/vit_base_patch14_dinov2.lvd142m_396', 'timm/vit_base_patch16_224.dino_347', 'timm/vit_base_patch16_224.dino_639', 'timm/vit_base_patch14_dinov2.lvd142m_547', 'timm/vit_base_patch16_224.dino_369', 'timm/vit_base_patch16_224.dino_6', 'timm/vit_base_patch14_dinov2.lvd142m_191', 'timm/vit_base_patch16_224.dino_277', 'timm/vit_base_patch14_dinov2.lvd142m_379', 'timm/vit_base_patch14_dinov2.lvd142m_197', 'timm/vit_base_patch16_224.dino_404', 'timm/vit_base_patch14_dinov2.lvd142m_655', 'timm/vit_base_patch16_224.dino_716', 'timm/vit_base_patch14_dinov2.lvd142m_61', 'timm/vit_base_patch16_224.dino_752', 'timm/vit_base_patch16_224.dino_724', 'timm/vit_base_patch16_224.dino_604', 'timm/vit_base_patch14_dinov2.lvd142m_550', 'timm/vit_base_patch14_dinov2.lvd142m_356', 'timm/vit_base_patch16_224.dino_439', 'timm/vit_base_patch14_dinov2.lvd142m_44', 'timm/vit_base_patch14_dinov2.lvd142m_260', 'timm/vit_base_patch14_dinov2.lvd142m_424', 'timm/vit_base_patch16_224.dino_398', 'timm/vit_base_patch14_dinov2.lvd142m_626', 'timm/vit_base_patch14_dinov2.lvd142m_531', 'timm/vit_base_patch14_dinov2.lvd142m_496', 'timm/vit_base_patch14_dinov2.lvd142m_91', 'timm/vit_base_patch14_dinov2.lvd142m_9', 'timm/vit_base_patch14_dinov2.lvd142m_274', 'timm/vit_base_patch16_224.dino_368', 'timm/vit_base_patch16_224.dino_712', 'timm/vit_base_patch16_224.dino_756', 'timm/vit_base_patch14_dinov2.lvd142m_686', 'timm/vit_base_patch14_dinov2.lvd142m_163', 'timm/vit_base_patch14_dinov2.lvd142m_218', 'timm/vit_base_patch14_dinov2.lvd142m_160', 'timm/vit_base_patch14_dinov2.lvd142m_184', 'timm/vit_base_patch16_224.dino_118', 'timm/vit_base_patch14_dinov2.lvd142m_503', 'timm/vit_base_patch14_dinov2.lvd142m_745', 'timm/vit_base_patch16_224.dino_642', 'timm/vit_base_patch14_dinov2.lvd142m_376', 'timm/vit_base_patch16_224.dino_13', 'timm/vit_base_patch14_dinov2.lvd142m_702', 'timm/vit_base_patch14_dinov2.lvd142m_270', 'timm/vit_base_patch16_224.dino_181', 'timm/vit_base_patch14_dinov2.lvd142m_458', 'timm/vit_base_patch14_dinov2.lvd142m_541', 'timm/vit_base_patch14_dinov2.lvd142m_215', 'timm/vit_base_patch16_224.dino_540', 'timm/vit_base_patch16_224.dino_386', 'timm/vit_base_patch14_dinov2.lvd142m_517', 'timm/vit_base_patch16_224.dino_601', 'timm/vit_base_patch14_dinov2.lvd142m_128', 'timm/vit_base_patch16_224.dino_742', 'timm/vit_base_patch14_dinov2.lvd142m_350', 'timm/vit_base_patch16_224.dino_222', 'timm/vit_base_patch14_dinov2.lvd142m_607', 'timm/vit_base_patch16_224.dino_346', 'timm/vit_base_patch16_224.dino_204', 'timm/vit_base_patch16_224.dino_326', 'timm/vit_base_patch14_dinov2.lvd142m_630', 'timm/vit_base_patch14_dinov2.lvd142m_5', 'timm/vit_base_patch14_dinov2.lvd142m_743', 'timm/vit_base_patch14_dinov2.lvd142m_117', 'timm/vit_base_patch16_224.dino_630', 'timm/vit_base_patch14_dinov2.lvd142m_361', 'timm/vit_base_patch14_dinov2.lvd142m_60', 'timm/vit_base_patch16_224.dino_308', 'timm/vit_base_patch14_dinov2.lvd142m_162', 'timm/vit_base_patch16_224.dino_378', 'timm/vit_base_patch14_dinov2.lvd142m_199', 'timm/vit_base_patch14_dinov2.lvd142m_583', 'timm/vit_base_patch14_dinov2.lvd142m_89', 'timm/vit_base_patch14_dinov2.lvd142m_269', 'timm/vit_base_patch14_dinov2.lvd142m_71', 'timm/vit_base_patch14_dinov2.lvd142m_645', 'timm/vit_base_patch16_224.dino_738', 'timm/vit_base_patch14_dinov2.lvd142m_523', 'timm/vit_base_patch16_224.dino_590', 'timm/vit_base_patch14_dinov2.lvd142m_204', 'timm/vit_base_patch14_dinov2.lvd142m_385', 'timm/vit_base_patch16_224.dino_164', 'timm/vit_base_patch16_224.dino_556', 'timm/vit_base_patch14_dinov2.lvd142m_233', 'timm/vit_base_patch14_dinov2.lvd142m_461', 'timm/vit_base_patch16_224.dino_395', 'timm/vit_base_patch14_dinov2.lvd142m_617', 'timm/vit_base_patch14_dinov2.lvd142m_372', 'timm/vit_base_patch16_224.dino_61', 'timm/vit_base_patch16_224.dino_480', 'timm/vit_base_patch14_dinov2.lvd142m_637', 'timm/vit_base_patch14_dinov2.lvd142m_352', 'timm/vit_base_patch14_dinov2.lvd142m_217', 'timm/vit_base_patch16_224.dino_312', 'timm/vit_base_patch14_dinov2.lvd142m_253', 'timm/vit_base_patch14_dinov2.lvd142m_509', 'timm/vit_base_patch16_224.dino_272', 'timm/vit_base_patch14_dinov2.lvd142m_80', 'timm/vit_base_patch14_dinov2.lvd142m_427', 'timm/vit_base_patch16_224.dino_610', 'timm/vit_base_patch14_dinov2.lvd142m_535', 'timm/vit_base_patch16_224.dino_748', 'timm/vit_base_patch14_dinov2.lvd142m_482', 'timm/vit_base_patch14_dinov2.lvd142m_41', 'timm/vit_base_patch16_224.dino_652', 'timm/vit_base_patch14_dinov2.lvd142m_236', 'timm/vit_base_patch14_dinov2.lvd142m_423', 'timm/vit_base_patch16_224.dino_571', 'timm/vit_base_patch14_dinov2.lvd142m_421', 'timm/vit_base_patch14_dinov2.lvd142m_252', 'timm/vit_base_patch14_dinov2.lvd142m_568', 'timm/vit_base_patch16_224.dino_622', 'timm/vit_base_patch16_224.dino_194', 'timm/vit_base_patch14_dinov2.lvd142m_464', 'timm/vit_base_patch16_224.dino_137', 'timm/vit_base_patch16_224.dino_276', 'timm/vit_base_patch16_224.dino_688', 'timm/vit_base_patch16_224.dino_91', 'timm/vit_base_patch14_dinov2.lvd142m_135', 'timm/vit_base_patch16_224.dino_728', 'timm/vit_base_patch14_dinov2.lvd142m_507', 'timm/vit_base_patch16_224.dino_7', 'timm/vit_base_patch16_224.dino_219', 'timm/vit_base_patch14_dinov2.lvd142m_237', 'timm/vit_base_patch14_dinov2.lvd142m_34', 'timm/vit_base_patch16_224.dino_45', 'timm/vit_base_patch14_dinov2.lvd142m_720', 'timm/vit_base_patch14_dinov2.lvd142m_8', 'timm/vit_base_patch14_dinov2.lvd142m_230', 'timm/vit_base_patch16_224.dino_66', 'timm/vit_base_patch16_224.dino_46', 'timm/vit_base_patch14_dinov2.lvd142m_73', 'timm/vit_base_patch14_dinov2.lvd142m_384', 'timm/vit_base_patch16_224.dino_455', 'timm/vit_base_patch16_224.dino_131', 'timm/vit_base_patch14_dinov2.lvd142m_551', 'timm/vit_base_patch16_224.dino_416', 'timm/vit_base_patch16_224.dino_577', 'timm/vit_base_patch16_224.dino_354', 'timm/vit_base_patch14_dinov2.lvd142m_397', 'timm/vit_base_patch16_224.dino_286', 'timm/vit_base_patch16_224.dino_669', 'timm/vit_base_patch16_224.dino_504', 'timm/vit_base_patch14_dinov2.lvd142m_145', 'timm/vit_base_patch16_224.dino_494', 'timm/vit_base_patch16_224.dino_763', 'timm/vit_base_patch16_224.dino_735', 'timm/vit_base_patch16_224.dino_367', 'timm/vit_base_patch16_224.dino_517', 'timm/vit_base_patch14_dinov2.lvd142m_365', 'timm/vit_base_patch14_dinov2.lvd142m_381', 'timm/vit_base_patch14_dinov2.lvd142m_667', 'timm/vit_base_patch16_224.dino_563', 'timm/vit_base_patch14_dinov2.lvd142m_97', 'timm/vit_base_patch16_224.dino_98', 'timm/vit_base_patch16_224.dino_217', 'timm/vit_base_patch14_dinov2.lvd142m_622', 'timm/vit_base_patch16_224.dino_353', 'timm/vit_base_patch14_dinov2.lvd142m_66', 'timm/vit_base_patch14_dinov2.lvd142m_393', 'timm/vit_base_patch14_dinov2.lvd142m_19', 'timm/vit_base_patch14_dinov2.lvd142m_224', 'timm/vit_base_patch16_224.dino_472', 'timm/vit_base_patch16_224.dino_162', 'timm/vit_base_patch14_dinov2.lvd142m_733', 'timm/vit_base_patch16_224.dino_295', 'timm/vit_base_patch16_224.dino_478', 'timm/vit_base_patch16_224.dino_212', 'timm/vit_base_patch14_dinov2.lvd142m_283', 'timm/vit_base_patch16_224.dino_743']


In [1112]:
result_frame: ResultFrame = ResultFrame()
secondary_model_list = [
    # "vit_large_patch14_dinov2.lvd142m",
    # "vit_small_patch14_dinov2.lvd142m",
    # "eva_large_patch14_196.in22k_ft_in22k_in1k",
    "vit_base_patch14_dinov2.lvd142m",
    # 'resnet34'
    # "vit_base_patch16_224.mae"
]
# secondary_options = ["mean", "get_one", "get_97"] + [f"get_{k}" for k in [5, 10, 20, 30]] + [f"interval_{k}" for k in [5, 10, 20, 30]]
secondary_options =  [f"get_{k}" for k in [30]]
# options = ["mean", "get_one", "get_97"] + [f"get_{k}" for k in [5, 10, 20, 30]]
options = ["get_10"]
for model_name in model_list:
  for option in options:
    # resnet_dataset = pd.read_excel(f"legacy/{model_name}_{option}.xlsx")
    resnet_dataset = pd.read_excel(f"{model_name}_{option}.xlsx")
    df_orig = list_dataset["std_col_name_ohv_type"].get_dataframe()
    for snd_model in secondary_model_list:
      for opt in secondary_options:
        df_new = pd.merge(df_orig, resnet_dataset, on = "name")
        df_snd = pd.read_excel(f"{snd_model}_{opt}.xlsx")
        # df_snd = pd.read_excel(f"legacy/{snd_model}_{opt}.xlsx")
        df_new = pd.merge(df_new, df_snd, on = "name")
        snd_cols = list(df_snd.drop("name", axis = 1).columns)
        dataset_orig_resnet =  Dataset("orig_resnet", df_new, ScalerType.Raw_)


        list_dataset["orig_resnet"] = dataset_orig_resnet
        orig_cols = ['mass', 'v_diameter', "C"]
        resnet_cols = list(resnet_dataset.drop("name", axis = 1).columns)

        # x_cols = ['B', 'C'] + resnet_cols
        x_cols = ["mass", "v_diameter", "C"] + resnet_cols
        feature_collection = {"orig_cols": orig_cols, "x_cols": x_cols, "resnet_cols": selected_feats_backup + ["mass", "v_diameter", "C"]}



        for dataset_name in std_dataset_names:
            for model_type in ModelType:
              model_family = model_type.__str__().split(".")[-1]
              if model_family in ["DECISION_TREE", "XGB", "LINEAR_REGRESSION", "SVR", "RANDOM_FOREST", "KNN"]:
                continue
              if model_family == "LIGHT_GBM":
                for param in param_search[model_family]:
                  for key, value in param.items():
                    model_type.value[key] = value
                  for key, value in feature_collection.items():
                      if key in ["orig_cols", "x_cols"]:
                          continue
                      name = f"{model_family}_{key}_{model_name}_{option}_{str(param)}_{snd_model}_{opt}"
                      print(f"Training {name}")
                      model = Model(
                          model_type=model_type,
                          name= name,
                          x_cols=value,
                          y_cols=y_cols,
                          dataset=list_dataset["orig_resnet"],
                          scaler_type=ScalerType.Raw_,
                      )
                      result = model.evaluate()
                      r2  =    result.result[-2]
                      mse =   result.result[-1]
                      print(f"R2: {r2} | MSE: {mse}")
                      result_frame.add_result(result)


result_frame.display_result()
# result_frame.save_result(f"result_{model_name}_num_leaves_exp.xlsx")

Training LIGHT_GBM_resnet_cols_vit_base_patch16_224.dino_get_10_{'learning_rate': 0.015, 'max_depth': -1, 'min_child_samples': 20, 'n_estimators': 1900}_vit_base_patch14_dinov2.lvd142m_get_30


100%|██████████████████████████████████████████████████████████████████████████████████| 81/81 [01:15<00:00,  1.08it/s]

R2: 0.6064994987801384 | MSE: 0.7216923342050287





Unnamed: 0,dataset_name,model_name,model_type,scaler_type,r2,mse
0,orig_resnet,LIGHT_GBM_resnet_cols_vit_base_patch16_224.din...,ModelType.LIGHT_GBM,ScalerType.Raw_,0.606499,0.721692
