# Prepare source domain dataloaders

In [1]:
# Add to path
import sys
import pandas as pd
sys.path.append('../shifts/weather/')


df_train = pd.read_csv('/Users/nikglukhov/n.glukhov/canonical-paritioned-dataset/shifts_canonical_train.csv')
df_dev_in = pd.read_csv('/Users/nikglukhov/n.glukhov/canonical-paritioned-dataset/shifts_canonical_dev_in.csv')
df_dev_out = pd.read_csv('/Users/nikglukhov/n.glukhov/canonical-paritioned-dataset/shifts_canonical_dev_out.csv')

In [2]:
domains_train = df_train.climate.unique()
domains_in = df_dev_in.climate.unique()
domains_out = df_dev_out.climate.unique()

In [3]:
import torch

class Dataset(torch.utils.data.Dataset):
  def __init__(self, df, climate):
        self.X_source_domain = df[df.climate == climate].iloc[:,6:].copy()
        self.y_source_domain = df[df.climate == climate]['fact_temperature'].copy()
        self.climate = climate

        assert len(self.X_source_domain) == len(self.y_source_domain)

  def __len__(self):
        return len(self.y_source_domain)

  def __getitem__(self, index):
        X = torch.tensor(self.X_source_domain.iloc[index].values)
        y = torch.tensor(self.y_source_domain.iloc[index])
        return X, y

### Create source domain loaders

In [4]:
batch_size = 64
source_domains_loaders =  {
    climate: torch.utils.data.DataLoader(Dataset(df_train, climate), batch_size = batch_size)
    for climate in domains_train
}

### Load experts

In [5]:
import catboost

In [9]:
dir_path = 'trained_experts'
source_domains_experts = {}
for climate in domains_train:
    model = catboost.CatBoostRegressor()
    model.load_model(f'{dir_path}/model_{climate}')
    source_domains_experts[climate] = model