In [1]:
%cd ..

/home/jovyan/ice-cube


In [35]:
import os

import pandas as pd

In [3]:
import logging

logging.basicConfig(
    # filename=__file__.replace('.py', '.log'),
    level=logging.getLevelName("INFO"),
    format="%(asctime)s [%(levelname)s] [%(module)s] %(message)s",
)

log = logging.getLogger(__name__)

In [4]:
##### INSERT SOURCE CODE HERE FOR SUBMISSION #####

In [5]:
from omegaconf import OmegaConf

omega_conf_dir = "./config"

c_main = OmegaConf.load(os.path.join(omega_conf_dir, "main.yaml"))
c_data = OmegaConf.load(os.path.join(omega_conf_dir, "data.yaml"))
c = OmegaConf.merge(c_main, c_data)

c.wandb.enabled = False

In [58]:
from src.ice_cube.data_loader import make_test_dataloader, make_train_dataloader
from src.ice_cube.model import load_pretrained_model
from src.ice_cube.scoring import angular_dist_score
from src.ice_cube.submission import to_submission_df

## SQLiteDataset

In [7]:
train_loader, valid_loader = make_train_dataloader(c)

In [8]:
test_loader = make_test_dataloader(c)





In [10]:
type(valid_loader.dataset)

graphnet.data.sqlite.sqlite_dataset.SQLiteDataset

In [51]:
train_label = train_loader.dataset.query_table(
    "meta_table", ["event_id", "azimuth", "zenith"]
)

In [29]:
valid_label = valid_loader.dataset.query_table(
    "meta_table", ["event_id", "azimuth", "zenith"]
)

In [52]:
train_df = pd.DataFrame(
    train_label, columns=["event_id", "azimuth", "zenith"]
).set_index("event_id")

In [42]:
valid_df = pd.DataFrame(
    valid_label, columns=["event_id", "azimuth", "zenith"]
).set_index("event_id")

In [53]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 200000 entries, 24 to 3266196
Data columns (total 2 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   azimuth  200000 non-null  float64
 1   zenith   200000 non-null  float64
dtypes: float64(2)
memory usage: 4.6 MB


In [43]:
valid_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 200000 entries, 24 to 3266196
Data columns (total 2 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   azimuth  200000 non-null  float64
 1   zenith   200000 non-null  float64
dtypes: float64(2)
memory usage: 4.6 MB


In [46]:
model = load_pretrained_model(c, valid_loader)

2023-02-22 03:50:07,940 [INFO] [instantiator] Created a temporary directory at /tmp/tmphv18e9qy
2023-02-22 03:50:07,941 [INFO] [instantiator] Writing /tmp/tmphv18e9qy/_remote_module_non_sriptable.py


In [47]:
results = model.predict_as_dataframe(
    gpus=[0],
    dataloader=valid_loader,
    prediction_columns=model.prediction_columns,
    additional_attributes=[c.settings.index_name],
)

2023-02-22 03:50:57,079 [INFO] [rank_zero] GPU available: True (cuda), used: True
2023-02-22 03:50:57,080 [INFO] [rank_zero] TPU available: False, using: 0 TPU cores
2023-02-22 03:50:57,081 [INFO] [rank_zero] IPU available: False, using: 0 IPUs
2023-02-22 03:50:57,081 [INFO] [rank_zero] HPU available: False, using: 0 HPUs
2023-02-22 03:50:57,083 [INFO] [rank_zero] GPU available: True (cuda), used: True
2023-02-22 03:50:57,084 [INFO] [rank_zero] TPU available: False, using: 0 TPU cores
2023-02-22 03:50:57,084 [INFO] [rank_zero] IPU available: False, using: 0 IPUs
2023-02-22 03:50:57,085 [INFO] [rank_zero] HPU available: False, using: 0 HPUs
2023-02-22 03:50:59,483 [INFO] [cuda] LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

In [55]:
results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 66000 entries, 0 to 65999
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   direction_x      66000 non-null  float64
 1   direction_y      66000 non-null  float64
 2   direction_z      66000 non-null  float64
 3   direction_kappa  66000 non-null  float64
 4   event_id         66000 non-null  float64
 5   zenith           66000 non-null  float64
 6   azimuth          66000 non-null  float64
dtypes: float64(7)
memory usage: 3.5 MB


In [49]:
submission_df = to_submission_df(results)

In [50]:
submission_df.info()

<class 'pandas.core.frame.DataFrame'>
Float64Index: 66000 entries, 1863447.0 to 1513735.0
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   azimuth  66000 non-null  float64
 1   zenith   66000 non-null  float64
dtypes: float64(2)
memory usage: 1.5 MB


In [56]:
part_valid_df = valid_df.loc[submission_df.index, :]

In [57]:
part_valid_df.info()

<class 'pandas.core.frame.DataFrame'>
Float64Index: 66000 entries, 1863447.0 to 1513735.0
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   azimuth  66000 non-null  float64
 1   zenith   66000 non-null  float64
dtypes: float64(2)
memory usage: 1.5 MB


In [59]:
score = angular_dist_score(
    part_valid_df["azimuth"],
    part_valid_df["zenith"],
    submission_df["azimuth"],
    submission_df["zenith"],
)

In [60]:
score

1.0166950435635094