In [None]:
import numpy as np
import pandas as pd 
from xgboost import XGBRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor 
from sklearn.preprocessing import MinMaxScaler 

In [None]:
DATA_DIR = "./data"
MODEL = 'dinov2_vitl14'
ITERATION = 2
TRAIN_DATA_PATH = f"{DATA_DIR}/train.csv"
TEST_DATA_PATH = f"{DATA_DIR}/test.csv"
TRAIN_EMBEDDINGS_PATH = f"{DATA_DIR}/{MODEL}_data/train_embeddings.csv"
TEST_EMBEDDINGS_PATH = f"{DATA_DIR}/{MODEL}_data/test_embeddings.csv"
SUBMISSION_PATH = f"{DATA_DIR}/{MODEL}_data/submission{ITERATION}.csv"

TRAIN_DATA_RATIO = 0.8

AUX_START = 1
AUX_END = 164 #7
AUX_CATEGORIES = AUX_END - AUX_START 

LABLES_START = 164
LABELS_END = 170
LABELS_CATEGORIES = LABELS_END - LABLES_START


In [None]:

def preprocessData(filename):
    df = pd.read_csv(filename)

    imageIds = df[['id']].values.squeeze()
    auxData = df.iloc[:, AUX_START:AUX_END]
    labels = df.iloc[:, LABLES_START:LABELS_END]

    return imageIds, auxData, labels

dataScaler = MinMaxScaler((0,1))
labelScaler = MinMaxScaler((0,1))

train_ids, train_aux, train_labels = preprocessData(TRAIN_DATA_PATH)
test_ids, test_aux, test_labels = preprocessData(TEST_DATA_PATH)

train_embed = pd.read_csv(TRAIN_EMBEDDINGS_PATH)
test_embed = pd.read_csv(TEST_EMBEDDINGS_PATH)

train_embed = [train_embed.loc[train_embed['img_id'] == i].values[0,1:] for i in train_ids]
test_embed = [test_embed.loc[test_embed['img_id'] == i].values[0,1:] for i in test_ids]

train_data = np.concatenate((train_aux, train_embed), axis=1)
test_data = np.concatenate((test_aux, test_embed), axis=1)

In [None]:

model = MultiOutputRegressor(XGBRegressor(objective ='reg:squarederror', n_estimators=100, max_depth=5, learning_rate=0.1))

# Train model
model.fit(train_data, train_labels)

test_labels = model.predict(test_data)



In [None]:
import csv 

with open(SUBMISSION_PATH, "w") as file:
    columns=['id', 'X4', 'X11', 'X18', 'X26', 'X50', 'X3112']
    csvwriter = csv.writer(file)
    csvwriter.writerow(columns)
    
    for i,p  in zip(test_ids, test_labels):
        csvwriter.writerow([i] + list(p))


In [None]:
SUBMISSION_PATH