In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
import glob
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

import copy
import pandas as pd
import numpy as np
import torch
from torch import nn

from sklearn.metrics import roc_auc_score

os.environ["WANDB_SILENT"] = "true"

from owkin.training import train, RocLoss
from owkin.dataset import build_dataset
from owkin.models.mono_models import MLP, SVM, LogisticRegression
from owkin.models.aggregators import SmoothMaxAggregator, MaxAggregator

## Data

In [3]:
normalizer_type="expo_lambda"
val_center="None"

In [4]:
X_train, X_train_mean, y_train, X_val, X_val_mean, y_val, X_test, X_test_mean, df_test = build_dataset(normalizer_type=normalizer_type, val_center=val_center)

## SVM

### MonoModel

In [5]:
C= 0.01
kernel = "sigmoid"

In [6]:
mono_model = SVM(C=C, kernel=kernel)

In [7]:
mono_model.sk_model.fit(X_train_mean, y_train)

SVC(C=0.01, kernel='sigmoid', probability=True)

In [8]:
mono_val_pred = mono_model(X_val_mean)
mono_model_val_score = roc_auc_score(y_val, mono_val_pred)
mono_model_val_score

0.699604743083004

In [13]:
run_name_ = f"{mono_model.name}_nt_{normalizer_type}_vc_{val_center}_C_{'{:.0e}'.format(C)}_kernel_{kernel}"

y_test = mono_model(X_test_mean)

submission = pd.DataFrame(
    {"Sample ID": df_test["Sample ID"].values, "Target": y_test}
).sort_values(
    "Sample ID"
)  # extra step to sort the sample IDs

# sanity checks
assert all(submission["Target"].between(0, 1)), "`Target` values must be in [0, 1]"
assert submission.shape == (149, 2), "Your submission file must be of shape (149, 2)"
assert list(submission.columns) == [
    "Sample ID",
    "Target",
], "Your submission file must have columns `Sample ID` and `Target`"



# save the submission as a csv file
OUTPUT_PATH_DIR = Path(f"../data/test_outputs/{run_name_}_{'{:.4f}'.format(mono_model_val_score)}/")
if not OUTPUT_PATH_DIR.is_dir():
    os.makedirs(OUTPUT_PATH_DIR)
submission.to_csv(f"{OUTPUT_PATH_DIR}/{'{:.4f}'.format(mono_model_val_score)}.csv", index=None)
submission.head()

Unnamed: 0,Sample ID,Target
0,ID_003.npy,0.46047
1,ID_004.npy,0.61118
2,ID_008.npy,0.490601
3,ID_009.npy,0.479459
4,ID_010.npy,0.291159
