In [1]:
import pandas as pd
import torch
import optuna
from transformers import (
    RobertaTokenizer, 
    RobertaForSequenceClassification,
    TrainingArguments, 
    Trainer,
    DataCollatorWithPadding,
    EarlyStoppingCallback
)
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 모델 및 토크나이저 로드
model_name = "DeepChem/ChemBERTa-77M-MLM"
tokenizer = RobertaTokenizer.from_pretrained(
    model_name,
    cache_dir='C:/huggingface_cache'
)
model = RobertaForSequenceClassification.from_pretrained(
    model_name, 
    num_labels=1,
    cache_dir='C:/huggingface_cache'
)



Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
# 데이터 로드
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
submission = pd.read_csv('sample_submission.csv')

print(f"훈련 데이터: {train_df.shape}")
print(f"테스트 데이터: {test_df.shape}")



훈련 데이터: (1681, 3)
테스트 데이터: (100, 2)


In [4]:
# 데이터셋 클래스 정의
class SMILESDataset(Dataset):
    def __init__(self, smiles_list, labels=None):
        self.smiles_list = smiles_list
        self.labels = labels

    def __len__(self):
        return len(self.smiles_list)

    def __getitem__(self, idx):
        smiles = str(self.smiles_list[idx])
        encoding = tokenizer(
            smiles,
            truncation=True,
            max_length=512,
            return_tensors='pt'
        )
        
        item = {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten()
        }
        
        if self.labels is not None:
            item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float)
        
        return item

# 평가 함수들
def normalized_rmse(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    return rmse / (np.max(y_true) - np.min(y_true))

def pearson_correlation(y_true, y_pred):
    corr = np.corrcoef(y_true, y_pred)[0, 1]
    return np.clip(corr, 0, 1)

def competition_score(y_true, y_pred):
    nrmse = min(normalized_rmse(y_true, y_pred), 1)
    pearson = pearson_correlation(y_true, y_pred)
    return 0.5 * (1 - nrmse) + 0.5 * pearson

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = predictions.flatten()
    labels = labels.flatten()
    
    nrmse = normalized_rmse(labels, predictions)
    pearson = pearson_correlation(labels, predictions)
    comp_score = competition_score(labels, predictions)
    
    return {
        'nrmse': nrmse,
        'pearson': pearson,
        'competition_score': comp_score,
        'mse': mean_squared_error(labels, predictions)
    }



In [5]:
# 데이터 분할
X_train, X_val, y_train, y_val = train_test_split(
    train_df['Canonical_Smiles'].values,
    train_df['Inhibition'].values,
    test_size=0.2,
    random_state=42
)

# 데이터셋 생성
train_dataset = SMILESDataset(X_train, y_train)
val_dataset = SMILESDataset(X_val, y_val)
test_dataset = SMILESDataset(test_df['Canonical_Smiles'].values)

# 데이터 콜레이터
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)



In [6]:
# Optuna 목적 함수
def objective(trial):
    # 하이퍼파라미터 탐색 공간
    learning_rate = trial.suggest_float("learning_rate", 5e-6, 5e-4, log=True)
    batch_size = trial.suggest_categorical("batch_size", [8, 16, 32])
    warmup_steps = trial.suggest_int("warmup_steps", 50, 300)
    weight_decay = trial.suggest_float("weight_decay", 0.001, 0.1, log=True)
    
    # 모델 로드
    model = RobertaForSequenceClassification.from_pretrained(
        model_name, 
        num_labels=1,
        cache_dir='C:/huggingface_cache'
    )
    
    # 훈련 설정
    training_args = TrainingArguments(
        output_dir=f'./optuna_trial_{trial.number}',
        num_train_epochs=30,  # 빠른 탐색을 위해 줄임
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        warmup_steps=warmup_steps,
        weight_decay=weight_decay,
        learning_rate=learning_rate,
        logging_steps=100,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="competition_score",
        greater_is_better=True,
        save_total_limit=1,
        report_to=None,
        fp16=False,
        dataloader_pin_memory=False
    )
    
    # Trainer 생성
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=5)]  # 빠른 탐색
    )
    
    # 훈련
    trainer.train()
    
    # 최종 평가
    eval_results = trainer.evaluate()
    
    # 메모리 정리
    del model
    torch.cuda.empty_cache()
    
    return eval_results['eval_competition_score']

In [7]:
# Optuna 스터디 생성 및 실행
print("Optuna 하이퍼파라미터 최적화 시작...")
study = optuna.create_study(
    direction="maximize",
    sampler=optuna.samplers.TPESampler(seed=42)
)

# 최적화 실행 (50회 시도)
study.optimize(objective, n_trials=50, timeout=7200)  # 2시간 제한

print("최적화 완료!")
print(f"최고 점수: {study.best_value:.4f}")
print(f"최적 하이퍼파라미터: {study.best_params}")

[I 2025-07-06 11:45:52,806] A new study created in memory with name: no-name-5ed7e64e-5fbe-4246-9c0d-8e694b0063eb


Optuna 하이퍼파라미터 최적화 시작...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1776.525,1623.592896,0.405446,0.175801,0.385178,1623.59314
2,1630.9258,1439.81543,0.38181,0.19359,0.40589,1439.81543
3,1374.657,1288.162842,0.361143,0.188377,0.413617,1288.162842
4,1322.7494,1171.611328,0.344418,0.201318,0.42845,1171.611328
5,1149.3881,1081.860718,0.330963,0.222468,0.445752,1081.86084
6,1087.5981,1010.719055,0.319896,0.234832,0.457468,1010.719055
7,1072.0793,954.806274,0.310922,0.242802,0.46594,954.806274
8,966.0625,908.967651,0.303367,0.246251,0.471442,908.967651
9,951.1537,872.809326,0.297272,0.260445,0.481586,872.809326
10,890.0811,843.40979,0.292222,0.319463,0.51362,843.409851


[I 2025-07-06 11:46:49,073] Trial 0 finished with value: 0.5136200510097674 and parameters: {'learning_rate': 2.8057582076672495e-05, 'batch_size': 8, 'warmup_steps': 89, 'weight_decay': 0.002051110418843397}. Best is trial 0 with value: 0.5136200510097674.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1786.7552,1734.040405,0.419009,0.05361,0.3173,1734.040405
2,1814.0258,1695.532227,0.414331,0.140226,0.362947,1695.532227
3,1702.1616,1650.27002,0.408763,0.125478,0.358357,1650.27002
4,1716.9025,1607.764648,0.403465,0.106715,0.351625,1607.764648
5,1615.57,1568.111084,0.398458,0.09745,0.349496,1568.111084
6,1628.9472,1530.745605,0.393682,0.091052,0.348685,1530.745605
7,1635.3492,1495.444702,0.389116,0.08631,0.348597,1495.44458


[I 2025-07-06 11:47:15,989] Trial 1 finished with value: 0.36294747391732696 and parameters: {'learning_rate': 6.533369619026635e-06, 'batch_size': 8, 'warmup_steps': 55, 'weight_decay': 0.08706020878304858}. Best is trial 0 with value: 0.5136200510097674.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1684.8839,1013.680664,0.320365,0.205763,0.442699,1013.680664
2,860.3822,725.47821,0.271023,0.325349,0.527163,725.478149
3,707.4032,696.739929,0.265601,0.289274,0.511836,696.739929
4,701.6015,677.560974,0.26192,0.210892,0.474486,677.560913
5,665.5202,639.407104,0.254439,0.328982,0.537272,639.407104
6,625.9364,620.994141,0.250748,0.33367,0.541461,620.994141
7,619.7043,653.182373,0.257165,0.315898,0.529367,653.182373
8,565.3079,639.137329,0.254385,0.306357,0.525986,639.137329
9,560.3054,698.74469,0.265983,0.352452,0.543235,698.74469
10,474.2617,632.374329,0.253035,0.330335,0.53865,632.374329


[I 2025-07-06 11:48:15,896] Trial 2 finished with value: 0.5542756736310166 and parameters: {'learning_rate': 0.00023112945005104147, 'batch_size': 8, 'warmup_steps': 126, 'weight_decay': 0.01120760621186057}. Best is trial 2 with value: 0.5542756736310166.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1725.299316,0.417952,0.131859,0.356953,1725.299316
2,1784.098900,1592.121704,0.401497,0.129721,0.364112,1592.121704
3,1720.836300,1468.803955,0.385635,0.133715,0.37404,1468.803955
4,1533.016100,1362.020264,0.371352,0.141847,0.385247,1362.020264
5,1374.430900,1271.694336,0.358827,0.12377,0.382471,1271.694336
6,1318.995300,1196.518311,0.34806,0.127921,0.38993,1196.518311
7,1318.995300,1134.078003,0.338856,0.101311,0.381227,1134.078003
8,1203.704100,1081.482788,0.330906,0.15284,0.410967,1081.482788
9,1117.946800,1036.869385,0.324008,0.131477,0.403734,1036.869385
10,1104.195100,998.664062,0.317983,0.147981,0.414999,998.664062


[I 2025-07-06 11:49:16,759] Trial 3 finished with value: 0.4895868710196091 and parameters: {'learning_rate': 3.654769917956452e-05, 'batch_size': 16, 'warmup_steps': 123, 'weight_decay': 0.005404103854647328}. Best is trial 2 with value: 0.5542756736310166.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1783.8409,1659.749023,0.409935,0.186364,0.388214,1659.749268
2,1627.6281,1398.393311,0.376278,0.183626,0.403674,1398.393433
3,1295.2274,1198.034912,0.34828,0.183378,0.417549,1198.034912
4,1218.1058,1059.033447,0.327453,0.199562,0.436055,1059.033447
5,1028.1055,961.047913,0.311937,0.21389,0.450976,961.047913
6,954.3988,889.727905,0.300139,0.219462,0.459662,889.727905
7,937.8797,836.630554,0.291046,0.162815,0.435885,836.630554
8,836.9214,797.370178,0.284135,0.203053,0.459459,797.370056
9,824.6455,769.952393,0.279207,0.210469,0.465631,769.952393
10,780.229,750.677917,0.27569,0.161838,0.443074,750.677979


[I 2025-07-06 11:50:38,256] Trial 4 finished with value: 0.5347119894914337 and parameters: {'learning_rate': 4.0842279473800804e-05, 'batch_size': 8, 'warmup_steps': 198, 'weight_decay': 0.001238513729886093}. Best is trial 2 with value: 0.5542756736310166.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1747.784546,0.420667,0.165169,0.372251,1747.784302
2,No log,1724.714722,0.417881,0.167719,0.374919,1724.714722
3,1809.300600,1663.14502,0.410355,0.11466,0.352153,1663.14502
4,1809.300600,1582.182617,0.400242,0.067899,0.333829,1582.182617
5,1658.575200,1485.907837,0.387874,0.06328,0.337703,1485.907837
6,1658.575200,1377.629883,0.373474,0.087135,0.35683,1377.629883
7,1658.575200,1267.086426,0.358177,0.140481,0.391152,1267.086426
8,1448.636700,1170.950073,0.344321,0.111975,0.383827,1170.950073
9,1448.636700,1094.464478,0.332886,0.143833,0.405474,1094.464355
10,1196.659000,1032.452026,0.323317,0.099274,0.387978,1032.451782


[I 2025-07-06 11:51:29,708] Trial 5 finished with value: 0.49622641524230116 and parameters: {'learning_rate': 8.204643365323964e-05, 'batch_size': 32, 'warmup_steps': 292, 'weight_decay': 0.041380401125610165}. Best is trial 2 with value: 0.5542756736310166.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1732.633911,0.41884,0.164389,0.372775,1732.633911
2,1786.519100,1655.591187,0.409422,0.08167,0.336124,1655.591187
3,1769.317000,1580.373169,0.400013,0.07343,0.336709,1580.373291
4,1642.364200,1514.229492,0.391553,0.134974,0.371711,1514.229492
5,1532.603700,1454.200806,0.383713,0.167351,0.391819,1454.200684
6,1516.585900,1399.645996,0.376447,0.182237,0.402895,1399.645874
7,1516.585900,1350.535645,0.369783,0.213514,0.421865,1350.535645
8,1421.310600,1306.502075,0.363705,0.200425,0.41836,1306.502075
9,1348.626300,1267.367065,0.358216,0.211543,0.426663,1267.367065
10,1353.156600,1232.494751,0.353254,0.23057,0.438658,1232.494751


[I 2025-07-06 11:52:42,673] Trial 6 finished with value: 0.47199742354884566 and parameters: {'learning_rate': 2.033281656757398e-05, 'batch_size': 16, 'warmup_steps': 80, 'weight_decay': 0.009780337016659405}. Best is trial 2 with value: 0.5542756736310166.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1790.3966,1742.491821,0.420029,0.128952,0.354462,1742.491821
2,1827.5113,1713.588989,0.416531,0.147757,0.365613,1713.588867
3,1722.5411,1674.090332,0.411703,0.132626,0.360462,1674.090332
4,1739.9102,1634.419922,0.406795,0.127648,0.360426,1634.420044
5,1641.8795,1597.379883,0.40216,0.14159,0.369715,1597.379883
6,1659.1592,1562.945435,0.397801,0.149737,0.375968,1562.945312
7,1667.7066,1530.55249,0.393657,0.157431,0.381887,1530.55249
8,1588.2556,1500.193237,0.389734,0.162538,0.386402,1500.193237
9,1613.8397,1471.605469,0.386002,0.164425,0.389211,1471.605469
10,1520.4987,1445.112915,0.382512,0.164055,0.390771,1445.113037


[I 2025-07-06 11:54:33,444] Trial 7 finished with value: 0.4071540134321787 and parameters: {'learning_rate': 5.857968696153527e-06, 'batch_size': 8, 'warmup_steps': 128, 'weight_decay': 0.010968217207529524}. Best is trial 2 with value: 0.5542756736310166.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1736.424194,0.419297,0.147182,0.363942,1736.424316
2,1788.719400,1633.125732,0.406634,0.143505,0.368435,1633.125977
3,1755.481100,1467.799805,0.385503,0.167463,0.39098,1467.799805
4,1530.103100,1286.814819,0.360954,0.15535,0.397198,1286.814697
5,1287.680200,1148.4823,0.341002,0.157987,0.408493,1148.4823
6,1175.275200,1045.374146,0.325335,0.149888,0.412277,1045.374146
7,1175.275200,965.940674,0.31273,0.141377,0.414324,965.940674
8,1034.684400,903.547119,0.302461,0.145535,0.421537,903.547119
9,933.670500,855.291992,0.294274,0.170985,0.438356,855.291931
10,902.748700,817.133179,0.287634,0.205906,0.459136,817.133179


[I 2025-07-06 11:55:44,023] Trial 8 finished with value: 0.5361349938286719 and parameters: {'learning_rate': 6.199983918423047e-05, 'batch_size': 16, 'warmup_steps': 285, 'weight_decay': 0.06161049539380966}. Best is trial 2 with value: 0.5542756736310166.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1731.3961,1376.577271,0.373331,0.112566,0.369617,1376.577393
2,1284.8026,1054.649414,0.326775,0.183711,0.428468,1054.649414
3,951.1377,882.494507,0.298917,0.150091,0.425587,882.494507
4,888.9295,791.712463,0.283125,0.150923,0.433899,791.712524
5,773.0528,742.913025,0.27426,0.227123,0.476431,742.913025
6,739.7881,718.402466,0.269698,0.325001,0.527651,718.402466
7,751.0489,707.157104,0.267579,0.202402,0.467412,707.157166
8,700.475,700.780396,0.26637,0.3168,0.525215,700.780396
9,712.5332,697.090576,0.265668,0.311932,0.523132,697.090576
10,699.072,670.759094,0.260602,0.254701,0.49705,670.759094


[I 2025-07-06 11:56:25,614] Trial 9 finished with value: 0.5276512176746114 and parameters: {'learning_rate': 7.848198194330569e-05, 'batch_size': 8, 'warmup_steps': 61, 'weight_decay': 0.004473636174621266}. Best is trial 2 with value: 0.5542756736310166.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1684.407837,0.412969,0.066782,0.326906,1684.407837
2,No log,1430.703491,0.3806,0.084813,0.352107,1430.703613
3,1676.771100,1131.373047,0.338452,0.085654,0.373601,1131.373169
4,1676.771100,891.211121,0.300389,0.014789,0.3572,891.211121
5,1030.235400,736.628723,0.273098,0.209723,0.468313,736.628723
6,1030.235400,695.620483,0.265387,0.273083,0.503848,695.620483
7,1030.235400,689.444458,0.264207,0.247884,0.491838,689.444458
8,720.780300,649.79657,0.256497,0.276673,0.510088,649.79657
9,720.780300,645.217407,0.255592,0.291813,0.518111,645.217468
10,659.789600,626.289062,0.251815,0.326111,0.537148,626.289124


[I 2025-07-06 11:57:02,499] Trial 10 finished with value: 0.5729473745443601 and parameters: {'learning_rate': 0.00045181656815872543, 'batch_size': 32, 'warmup_steps': 208, 'weight_decay': 0.022005828785027447}. Best is trial 10 with value: 0.5729473745443601.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1682.062256,0.412682,0.169037,0.378178,1682.062256
2,No log,1428.466797,0.380303,0.120611,0.370154,1428.466675
3,1674.417500,1129.069092,0.338107,0.083786,0.372839,1129.069092
4,1674.417500,891.125366,0.300375,0.063464,0.381545,891.125366
5,1028.767400,736.797119,0.273129,0.047764,0.387318,736.797119
6,1028.767400,695.536133,0.265371,0.258016,0.496322,695.536072
7,1028.767400,735.100464,0.272815,0.30178,0.514483,735.100464
8,720.480200,656.661621,0.257849,0.242709,0.49243,656.661621
9,720.480200,639.287842,0.254415,0.29286,0.519223,639.287842
10,642.014400,622.300049,0.251012,0.338211,0.5436,622.300049


[I 2025-07-06 11:57:37,691] Trial 11 finished with value: 0.5467822452041422 and parameters: {'learning_rate': 0.0004616775462768299, 'batch_size': 32, 'warmup_steps': 211, 'weight_decay': 0.02887352724952181}. Best is trial 10 with value: 0.5729473745443601.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1697.440063,0.414564,0.176621,0.381029,1697.440063
2,No log,1468.130615,0.385546,0.101208,0.357831,1468.130493
3,1696.129100,1182.462036,0.346009,0.111835,0.382913,1182.462036
4,1696.129100,944.516663,0.309242,0.115215,0.402986,944.516602
5,1084.323000,776.12439,0.280324,0.116343,0.41801,776.124329
6,1084.323000,701.127197,0.266436,0.098596,0.41608,701.127197
7,1084.323000,694.861572,0.265243,0.305442,0.520099,694.861572
8,738.963200,683.485535,0.263062,0.171556,0.454247,683.485474
9,738.963200,640.811523,0.254718,0.33679,0.541036,640.811462
10,676.950900,638.029785,0.254164,0.313171,0.529503,638.029785


[I 2025-07-06 11:58:12,715] Trial 12 finished with value: 0.5488307118154415 and parameters: {'learning_rate': 0.00043204035227877843, 'batch_size': 32, 'warmup_steps': 230, 'weight_decay': 0.01717548248900738}. Best is trial 10 with value: 0.5729473745443601.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1717.906372,0.417056,0.182048,0.382496,1717.906494
2,No log,1548.214966,0.395922,0.135362,0.36972,1548.215088
3,1733.563400,1310.941895,0.364322,0.124329,0.380003,1310.941895
4,1733.563400,1089.187622,0.332082,0.124518,0.396218,1089.187744
5,1228.267900,942.072266,0.308842,0.108722,0.39994,942.072327
6,1228.267900,842.822693,0.292121,0.082995,0.395437,842.82251
7,1228.267900,775.412109,0.280195,0.137748,0.428776,775.412109
8,890.652100,732.786926,0.272385,0.2048,0.466207,732.786987
9,890.652100,711.997559,0.268493,0.193529,0.462518,711.997681
10,738.381600,700.330872,0.266284,0.210061,0.471888,700.330872


[I 2025-07-06 11:58:49,732] Trial 13 finished with value: 0.5480390711924561 and parameters: {'learning_rate': 0.0001974755883490512, 'batch_size': 32, 'warmup_steps': 148, 'weight_decay': 0.02106510528632681}. Best is trial 10 with value: 0.5729473745443601.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1714.748901,0.416672,0.174747,0.379037,1714.749023
2,No log,1550.485474,0.396212,0.141747,0.372767,1550.485474
3,1732.077300,1319.015625,0.365443,0.10093,0.367744,1319.015625
4,1732.077300,1092.893433,0.332647,0.066779,0.367066,1092.893555
5,1231.528300,927.729309,0.306482,0.041627,0.367573,927.729431
6,1231.528300,821.109253,0.288333,0.084932,0.3983,821.109131
7,1231.528300,753.215393,0.276156,0.191624,0.457734,753.215393
8,868.903700,716.159241,0.269277,0.221302,0.476013,716.159241
9,868.903700,702.16571,0.266633,0.225781,0.479574,702.16571
10,721.131300,696.261597,0.26551,0.344699,0.539595,696.261597


[I 2025-07-06 11:59:26,323] Trial 14 finished with value: 0.5429378777008194 and parameters: {'learning_rate': 0.00022367873502150072, 'batch_size': 32, 'warmup_steps': 169, 'weight_decay': 0.005959206131645764}. Best is trial 10 with value: 0.5729473745443601.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1749.0027,1345.051514,0.369032,0.111572,0.37127,1345.051392
2,1102.1755,819.243896,0.288006,0.050648,0.381321,819.243896
3,736.1491,708.736267,0.267878,0.255974,0.494048,708.736267
4,712.4158,676.041199,0.261626,0.262817,0.500595,676.041199
5,667.8859,650.791016,0.256694,0.284261,0.513784,650.791016
6,629.8811,643.199463,0.255192,0.308773,0.526791,643.199463
7,628.0457,663.251831,0.259139,0.294264,0.517562,663.251831
8,560.5784,623.070007,0.251167,0.357374,0.553104,623.070007
9,560.5704,675.004517,0.261425,0.313988,0.526281,675.004578
10,504.7287,647.720947,0.256087,0.302943,0.523428,647.720947


[I 2025-07-06 12:00:15,290] Trial 15 finished with value: 0.5531035032518639 and parameters: {'learning_rate': 0.00019138534035817448, 'batch_size': 8, 'warmup_steps': 245, 'weight_decay': 0.013230460335438466}. Best is trial 10 with value: 0.5729473745443601.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1737.880371,0.419473,0.218993,0.39976,1737.880615
2,No log,1654.497803,0.409286,0.170963,0.380838,1654.497803
3,1780.571600,1513.342529,0.391438,0.162206,0.385384,1513.342529
4,1780.571600,1345.449951,0.369086,0.124844,0.377879,1345.449829
5,1459.900200,1186.898682,0.346658,0.156518,0.40493,1186.898804
6,1459.900200,1070.09436,0.329159,0.135671,0.403256,1070.09436
7,1459.900200,981.923706,0.315307,0.158313,0.421503,981.923584
8,1133.430500,913.040588,0.304046,0.153848,0.424901,913.040588
9,1133.430500,858.598877,0.294842,0.13291,0.419034,858.598877
10,925.944900,815.318176,0.287315,0.132093,0.422389,815.318054


[I 2025-07-06 12:00:57,622] Trial 16 finished with value: 0.5398737612215192 and parameters: {'learning_rate': 0.00012626855053237253, 'batch_size': 32, 'warmup_steps': 179, 'weight_decay': 0.03619983114641091}. Best is trial 10 with value: 0.5729473745443601.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1713.4834,1101.874878,0.334011,0.160615,0.413302,1101.875
2,864.1786,707.85675,0.267711,0.318678,0.525483,707.85675
3,696.8025,676.396057,0.261695,0.26119,0.499748,676.396057
4,662.4064,645.578125,0.255663,0.29673,0.520533,645.578186
5,652.0449,639.648621,0.254487,0.32629,0.535902,639.648621
6,594.7549,634.649963,0.25349,0.368362,0.557436,634.649963
7,573.412,634.898376,0.25354,0.350273,0.548367,634.898438
8,516.0005,715.247375,0.269105,0.232968,0.481931,715.247375
9,496.7977,724.259338,0.270795,0.305719,0.517462,724.259277
10,424.6137,720.834595,0.270154,0.347421,0.538634,720.834595


[I 2025-07-06 12:01:39,456] Trial 17 finished with value: 0.5574358383182196 and parameters: {'learning_rate': 0.00035169237047489205, 'batch_size': 8, 'warmup_steps': 253, 'weight_decay': 0.0036025945530639577}. Best is trial 10 with value: 0.5729473745443601.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1718.131348,0.417083,0.116299,0.349608,1718.131348
2,No log,1550.382324,0.396199,0.083173,0.343487,1550.382446
3,1733.941400,1321.979004,0.365853,0.086032,0.360089,1321.979004
4,1733.941400,1094.44397,0.332883,0.032092,0.349605,1094.444092
5,1232.463000,909.430481,0.303444,0.188056,0.442306,909.430481
6,1232.463000,774.19519,0.279975,0.186414,0.453219,774.195129
7,1232.463000,707.333557,0.267612,0.193861,0.463124,707.333557
8,826.375200,695.548584,0.265374,0.281107,0.507867,695.548523
9,826.375200,693.916931,0.265062,0.303959,0.519449,693.916931
10,693.629100,640.619995,0.25468,0.321008,0.533164,640.619995


[I 2025-07-06 12:02:18,071] Trial 18 finished with value: 0.5357583250133238 and parameters: {'learning_rate': 0.00034345447576133, 'batch_size': 32, 'warmup_steps': 261, 'weight_decay': 0.0028050942103296397}. Best is trial 10 with value: 0.5729473745443601.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1700.183838,0.414899,0.167059,0.37608,1700.18396
2,1771.182200,1489.952515,0.388401,0.14009,0.375845,1489.952515
3,1634.647300,1214.831543,0.350713,0.037129,0.343208,1214.831543
4,1275.872600,997.30304,0.317766,0.129066,0.40565,997.30304
5,986.407800,864.349609,0.295828,0.130976,0.417574,864.349609
6,876.596100,784.220154,0.281782,0.17383,0.446024,784.220154
7,876.596100,737.279236,0.273219,0.12807,0.427426,737.279236
8,775.272400,714.293213,0.268926,0.279151,0.505113,714.293152
9,720.002400,703.292114,0.266847,0.298076,0.515615,703.292175
10,716.023800,686.447449,0.263632,0.156064,0.446216,686.447449


[I 2025-07-06 12:02:54,190] Trial 19 finished with value: 0.5156146430908314 and parameters: {'learning_rate': 0.0001171542971814984, 'batch_size': 16, 'warmup_steps': 263, 'weight_decay': 0.0010242771533841863}. Best is trial 10 with value: 0.5729473745443601.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1751.537231,0.421118,0.047507,0.313194,1751.537231
2,No log,1748.364746,0.420737,0.082744,0.331004,1748.364746
3,1819.375300,1742.516724,0.420032,0.119716,0.349842,1742.516724
4,1819.375300,1732.161011,0.418782,0.14486,0.363039,1732.161133
5,1774.633900,1713.790039,0.416556,0.150831,0.367138,1713.789917
6,1774.633900,1687.391724,0.413335,0.14304,0.364853,1687.391724
7,1774.633900,1661.339355,0.410132,0.147763,0.368816,1661.339233
8,1752.172500,1637.749756,0.40721,0.148711,0.370751,1637.749634
9,1752.172500,1616.139526,0.404514,0.146961,0.371223,1616.139526
10,1693.896900,1596.185425,0.402009,0.149782,0.373887,1596.185425


[I 2025-07-06 12:03:50,172] Trial 20 finished with value: 0.38807072840998424 and parameters: {'learning_rate': 1.2125223750341214e-05, 'batch_size': 32, 'warmup_steps': 230, 'weight_decay': 0.0024766144912826576}. Best is trial 10 with value: 0.5729473745443601.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1727.2911,1162.475464,0.343073,0.122104,0.389516,1162.475464
2,926.2761,738.338257,0.273415,0.17517,0.450878,738.338318
3,708.6615,696.915161,0.265634,0.268988,0.501677,696.915161
4,701.9606,656.698792,0.257856,0.294439,0.518292,656.698792
5,670.3943,642.959167,0.255144,0.288543,0.516699,642.959106
6,632.4502,625.247986,0.251606,0.326291,0.537343,625.247986
7,624.1042,700.976807,0.266407,0.302015,0.517804,700.976807
8,560.2037,676.572266,0.261729,0.344182,0.541227,676.572266
9,543.0018,672.587097,0.260957,0.355927,0.547485,672.587097
10,465.0877,642.295044,0.255013,0.355944,0.550465,642.295044


[I 2025-07-06 12:05:05,301] Trial 21 finished with value: 0.5700154413879907 and parameters: {'learning_rate': 0.0002471482213279421, 'batch_size': 8, 'warmup_steps': 197, 'weight_decay': 0.0068047356154822426}. Best is trial 10 with value: 0.5729473745443601.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1702.918,1064.998047,0.328374,0.134496,0.403061,1064.998169
2,842.6909,708.934692,0.267915,0.321779,0.526932,708.934631
3,702.6779,664.885315,0.259458,0.277872,0.509207,664.885254
4,668.9583,642.849548,0.255123,0.27652,0.510699,642.849548
5,611.7131,605.254639,0.24755,0.368203,0.560326,605.254639
6,579.0791,588.884644,0.24418,0.417181,0.586501,588.884644
7,559.5338,635.848877,0.25373,0.38027,0.56327,635.848877
8,491.8732,654.997681,0.257522,0.379836,0.561157,654.997742
9,481.4385,639.170776,0.254392,0.389437,0.567523,639.170837
10,402.8392,634.012634,0.253363,0.360591,0.553614,634.012634


[I 2025-07-06 12:05:47,245] Trial 22 finished with value: 0.586500525139622 and parameters: {'learning_rate': 0.00030994900647604996, 'batch_size': 8, 'warmup_steps': 201, 'weight_decay': 0.007364521772775286}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1701.3091,1049.712036,0.326009,0.01784,0.345916,1049.712158
2,837.6462,709.310303,0.267986,0.292641,0.512327,709.310303
3,700.3555,725.9104,0.271104,0.190524,0.45971,725.9104
4,695.35,666.55603,0.259784,0.274911,0.507563,666.55603
5,683.04,655.179504,0.257558,0.270819,0.506631,655.179504
6,633.7183,621.126404,0.250775,0.423516,0.586371,621.126343
7,626.0519,620.126099,0.250573,0.390258,0.569842,620.126099
8,558.8918,686.138428,0.263573,0.281729,0.509078,686.138428
9,557.9286,684.325684,0.263224,0.324061,0.530419,684.325684
10,475.0754,620.138,0.250575,0.361018,0.555221,620.138


[I 2025-07-06 12:06:30,285] Trial 23 finished with value: 0.5863707311759392 and parameters: {'learning_rate': 0.00029837444430027367, 'batch_size': 8, 'warmup_steps': 188, 'weight_decay': 0.007404414578826395}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1741.8828,1277.885986,0.3597,0.020464,0.330382,1277.885986
2,1072.1664,835.019165,0.290765,0.155039,0.432137,835.019287
3,754.7691,721.557861,0.27029,0.21817,0.47394,721.557861
4,724.6487,698.564819,0.265949,0.272812,0.503432,698.56488
5,706.6513,661.426392,0.258783,0.293614,0.517416,661.426392
6,657.5752,662.130432,0.25892,0.225932,0.483506,662.130371
7,654.8247,669.425354,0.260343,0.297682,0.51867,669.425415
8,590.0397,653.987488,0.257323,0.295627,0.519152,653.987427
9,573.4726,685.875122,0.263522,0.344313,0.540395,685.875061
10,519.5037,673.162537,0.261068,0.321222,0.530077,673.162537


[I 2025-07-06 12:07:23,395] Trial 24 finished with value: 0.5403954517452308 and parameters: {'learning_rate': 0.00015599095228165317, 'batch_size': 8, 'warmup_steps': 168, 'weight_decay': 0.020665418187125653}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1712.7505,1097.400635,0.333332,0.113065,0.389867,1097.400757
2,862.1176,711.988892,0.268492,0.224524,0.478016,711.988892
3,703.5989,695.154053,0.265298,0.086501,0.410601,695.154053
4,698.6406,693.97998,0.265074,0.288068,0.511497,693.979919
5,684.6833,643.499817,0.255252,0.290954,0.517851,643.499817
6,644.4731,645.787903,0.255705,0.281079,0.512687,645.787842
7,639.8791,668.735596,0.260208,0.270859,0.505325,668.735535
8,580.3545,666.458374,0.259765,0.264374,0.502304,666.458374
9,583.7974,647.576965,0.256059,0.3529,0.548421,647.576965
10,514.75,621.341125,0.250818,0.353473,0.551327,621.341125


[I 2025-07-06 12:08:20,726] Trial 25 finished with value: 0.5513273120395712 and parameters: {'learning_rate': 0.00030958690261619943, 'batch_size': 8, 'warmup_steps': 212, 'weight_decay': 0.007228492392865632}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1611.4255,781.087097,0.281219,0.061959,0.39037,781.087097
2,730.8064,690.48761,0.264407,0.30832,0.521957,690.48761
3,699.7816,682.145386,0.262804,0.159417,0.448306,682.145325
4,691.3203,677.358887,0.261881,0.174869,0.456494,677.358887
5,687.5304,656.429443,0.257803,0.25741,0.499803,656.429443
6,662.4452,651.091125,0.256753,0.263564,0.503405,651.091125
7,672.9205,645.455566,0.255639,0.27024,0.5073,645.455627


[I 2025-07-06 12:08:49,047] Trial 26 finished with value: 0.5219567475511178 and parameters: {'learning_rate': 0.00047947064240833406, 'batch_size': 8, 'warmup_steps': 151, 'weight_decay': 0.008244692594565754}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1760.1486,1414.113037,0.378387,0.151205,0.386409,1414.113037
2,1217.4552,940.686829,0.308615,0.135283,0.413334,940.686768
3,827.1038,773.900513,0.279922,0.018329,0.369203,773.900513
4,774.8287,716.817017,0.269401,0.349003,0.539801,716.817017
5,710.9438,700.791016,0.266372,0.336146,0.534887,700.791016
6,698.5105,674.437012,0.261315,0.283091,0.510888,674.437012
7,705.7845,656.987549,0.257913,0.27712,0.509603,656.987549
8,652.6003,659.169067,0.258341,0.313867,0.527763,659.169067
9,642.3926,671.588623,0.260763,0.33195,0.535593,671.588623


[I 2025-07-06 12:09:23,339] Trial 27 finished with value: 0.539801030860423 and parameters: {'learning_rate': 0.0001210551138333399, 'batch_size': 8, 'warmup_steps': 193, 'weight_decay': 0.015103406097788688}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1574.106812,0.399219,0.118492,0.359636,1574.106812
2,1721.343300,1115.726685,0.336104,0.107612,0.385754,1115.726685
3,1301.906300,796.400024,0.283962,0.069418,0.392728,796.400024
4,837.648700,705.03949,0.267178,0.322734,0.527778,705.039612
5,696.111600,678.060364,0.262016,0.273857,0.50592,678.060364
6,690.214500,649.788086,0.256496,0.268445,0.505975,649.788025
7,690.214500,627.009949,0.25196,0.348559,0.548299,627.009888
8,652.285100,636.550415,0.25387,0.299936,0.523033,636.550415
9,597.533500,645.568604,0.255662,0.324785,0.534562,645.568604
10,573.442800,606.773804,0.247861,0.38557,0.568855,606.773804


[I 2025-07-06 12:10:01,036] Trial 28 finished with value: 0.568854816955221 and parameters: {'learning_rate': 0.00029549500335615677, 'batch_size': 16, 'warmup_steps': 220, 'weight_decay': 0.026832017342406315}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1728.817871,0.418378,0.084741,0.333181,1728.817871
2,No log,1599.84082,0.402469,0.095101,0.346316,1599.84082
3,1757.156700,1406.623169,0.377384,0.092002,0.357309,1406.623169
4,1757.156700,1200.964966,0.348706,0.100627,0.375961,1200.964966
5,1335.211700,1053.373535,0.326577,0.118891,0.396157,1053.373657
6,1335.211700,948.668701,0.309921,0.157378,0.423728,948.668579
7,1335.211700,871.318787,0.297018,0.183677,0.44333,871.318787
8,1004.138100,813.398193,0.286976,0.212587,0.462805,813.398193
9,1004.138100,771.201599,0.279433,0.191892,0.456229,771.20166
10,821.297300,740.118896,0.273744,0.237168,0.481712,740.118896


[I 2025-07-06 12:10:38,127] Trial 29 finished with value: 0.5296657156643803 and parameters: {'learning_rate': 0.0001513770987851562, 'batch_size': 32, 'warmup_steps': 148, 'weight_decay': 0.001514774919872617}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1783.4089,1698.432617,0.414685,0.164287,0.374801,1698.432617
2,1717.8469,1539.466187,0.394802,0.121488,0.363343,1539.466187
3,1484.918,1399.868896,0.376477,0.086908,0.355216,1399.868896
4,1440.2645,1282.676392,0.360373,0.086242,0.362934,1282.67627
5,1261.6005,1188.181763,0.346845,0.098386,0.37577,1188.181641
6,1199.6761,1112.247925,0.335579,0.102472,0.383446,1112.247925
7,1182.0637,1051.570801,0.326297,0.101688,0.387696,1051.570801
8,1069.4323,1000.415405,0.318262,0.116388,0.399063,1000.415405
9,1053.3203,959.650757,0.31171,0.069435,0.378863,959.650757
10,981.7859,924.999817,0.306031,0.094899,0.394434,924.999817


[I 2025-07-06 12:12:19,485] Trial 30 finished with value: 0.46764780806136075 and parameters: {'learning_rate': 2.2758624050081096e-05, 'batch_size': 8, 'warmup_steps': 181, 'weight_decay': 0.003973271001067322}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1719.2409,1125.243164,0.337534,0.154606,0.408536,1125.243164
2,894.3698,726.232666,0.271164,0.318229,0.523532,726.232666
3,705.545,695.545593,0.265373,0.318777,0.526702,695.545532
4,701.0655,683.214111,0.26301,0.337214,0.537102,683.214111
5,665.1915,638.399109,0.254238,0.29808,0.521921,638.399109
6,610.773,634.308105,0.253422,0.311247,0.528912,634.308044
7,599.9194,649.079407,0.256356,0.339039,0.541342,649.079407
8,530.7035,655.318665,0.257585,0.285076,0.513745,655.318787
9,520.5777,690.574646,0.264423,0.323265,0.529421,690.574646
10,436.1332,681.34021,0.262649,0.312808,0.525079,681.34021


[I 2025-07-06 12:13:05,765] Trial 31 finished with value: 0.5413416744842374 and parameters: {'learning_rate': 0.0002611980314589105, 'batch_size': 8, 'warmup_steps': 196, 'weight_decay': 0.00713436531661694}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1704.6698,1056.615356,0.327079,0.059311,0.366116,1056.615356
2,830.0819,702.317078,0.266662,0.257431,0.495385,702.317139
3,702.933,689.908752,0.264296,0.28279,0.509247,689.908752
4,684.6526,653.572083,0.257241,0.336569,0.539664,653.572083
5,627.908,630.234558,0.252607,0.307855,0.527624,630.234558
6,604.9885,627.397034,0.252038,0.313236,0.530599,627.397034
7,611.0711,652.070923,0.256946,0.342037,0.542546,652.070923
8,530.9859,652.383789,0.257008,0.335833,0.539413,652.383789
9,510.7748,697.314087,0.26571,0.34825,0.54127,697.314087
10,432.3236,653.663696,0.25726,0.347904,0.545322,653.663696


[I 2025-07-06 12:14:46,880] Trial 32 finished with value: 0.5565819183371122 and parameters: {'learning_rate': 0.0003728309996441753, 'batch_size': 8, 'warmup_steps': 239, 'weight_decay': 0.005604628888021623}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1722.4239,1142.609741,0.340129,0.153153,0.406512,1142.609741
2,907.6039,730.482178,0.271956,0.314449,0.521246,730.482239
3,706.5569,696.04071,0.265468,0.319352,0.526942,696.040771
4,701.0965,648.942139,0.256329,0.306056,0.524864,648.942139
5,652.1656,642.132507,0.25498,0.284213,0.514616,642.132507
6,622.1961,617.34082,0.25001,0.347993,0.548992,617.34082
7,617.828,659.332458,0.258373,0.319743,0.530685,659.332458
8,558.4688,656.786926,0.257873,0.251326,0.496727,656.786926
9,535.8913,741.188599,0.273942,0.320385,0.523222,741.188599
10,450.4785,686.066223,0.263559,0.326147,0.531294,686.066223


[I 2025-07-06 12:15:28,765] Trial 33 finished with value: 0.548991880554232 and parameters: {'learning_rate': 0.0002560954956321345, 'batch_size': 8, 'warmup_steps': 200, 'weight_decay': 0.00895532403127346}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1729.4656,1198.273438,0.348315,0.018103,0.334894,1198.273438
2,998.2907,788.610901,0.28257,0.171989,0.444709,788.610901
3,729.7732,706.800781,0.267512,0.325024,0.528756,706.800781
4,711.2555,695.391113,0.265344,0.339462,0.537059,695.391113
5,688.1523,657.713074,0.258055,0.271509,0.506727,657.713074
6,655.1323,647.350769,0.256014,0.268032,0.506009,647.350769
7,656.4615,668.289612,0.260122,0.278501,0.50919,668.289612
8,593.6711,662.269958,0.258948,0.333149,0.537101,662.27002
9,591.5104,687.443604,0.263823,0.293596,0.514886,687.443665
10,539.5457,664.5979,0.259402,0.309912,0.525255,664.597839


[I 2025-07-06 12:16:18,441] Trial 34 finished with value: 0.5371005569361992 and parameters: {'learning_rate': 0.0001807533450329042, 'batch_size': 8, 'warmup_steps': 160, 'weight_decay': 0.0031469480060628144}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1769.8439,1497.587524,0.389395,0.16787,0.389238,1497.587524
2,1345.1856,1064.710449,0.32833,0.169529,0.4206,1064.710449
3,933.4923,860.216614,0.29512,0.106554,0.405717,860.216614
4,861.5402,766.623535,0.278603,0.117128,0.419263,766.623535
5,749.7494,724.647278,0.270868,0.23193,0.480531,724.647278
6,721.3758,706.367981,0.26743,0.327047,0.529809,706.367981
7,736.323,700.117065,0.266244,0.323904,0.52883,700.117065
8,692.5716,696.341675,0.265525,0.302411,0.518443,696.341614
9,703.5738,690.276184,0.264366,0.270968,0.503301,690.276123
10,660.9849,652.188599,0.256969,0.343568,0.5433,652.188599


[I 2025-07-06 12:17:25,865] Trial 35 finished with value: 0.5460248636433033 and parameters: {'learning_rate': 9.165791192380296e-05, 'batch_size': 8, 'warmup_steps': 185, 'weight_decay': 0.0019927136219824245}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1663.5669,910.618408,0.303642,0.132197,0.414277,910.61853
2,756.2612,694.707153,0.265213,0.247914,0.49135,694.707092
3,677.6484,658.501709,0.25821,0.255009,0.498399,658.501648
4,669.2923,624.571655,0.25147,0.38702,0.567775,624.571655
5,624.862,603.412292,0.247173,0.376833,0.56483,603.412354
6,607.5961,619.134338,0.250373,0.343739,0.546683,619.134338
7,616.2884,649.23645,0.256387,0.309064,0.526338,649.236389
8,573.4858,642.011292,0.254956,0.312534,0.528789,642.011292
9,563.7093,661.115723,0.258722,0.326228,0.533753,661.115723


[I 2025-07-06 12:18:00,335] Trial 36 finished with value: 0.5677752979666635 and parameters: {'learning_rate': 0.00047402932121922396, 'batch_size': 8, 'warmup_steps': 211, 'weight_decay': 0.01335554660827537}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1685.2611,995.154785,0.317424,0.08119,0.381883,995.154846
2,840.4196,716.890198,0.269414,0.248827,0.489706,716.890198
3,710.1442,683.484802,0.263062,0.312376,0.524657,683.484802
4,690.7546,646.643799,0.255874,0.281685,0.512905,646.643799
5,640.466,629.212952,0.252402,0.322873,0.535236,629.212952
6,605.575,623.11145,0.251175,0.323978,0.536401,623.11145
7,597.0946,633.192993,0.253199,0.36894,0.55787,633.193054
8,536.9769,631.218262,0.252804,0.322878,0.535037,631.218262
9,531.0694,686.316833,0.263607,0.330694,0.533544,686.316833
10,461.3639,647.976562,0.256138,0.363504,0.553683,647.976562


[I 2025-07-06 12:18:45,582] Trial 37 finished with value: 0.5578701748213815 and parameters: {'learning_rate': 0.0002559317148537725, 'batch_size': 8, 'warmup_steps': 130, 'weight_decay': 0.09526261766589357}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1733.68103,0.418966,0.134331,0.357682,1733.68103
2,1786.424700,1610.529663,0.403811,0.037563,0.316876,1610.529663
3,1738.556100,1431.932861,0.380764,0.114037,0.366637,1431.932983
4,1498.297000,1275.650757,0.359385,0.141702,0.391159,1275.650879
5,1280.623000,1156.720947,0.342222,0.126606,0.392192,1156.720825
6,1189.832300,1065.408081,0.328437,0.143273,0.407418,1065.408081
7,1189.832300,993.599121,0.317176,0.202151,0.442488,993.59906
8,1060.765500,935.655884,0.307788,0.173562,0.432887,935.655884
9,966.435200,889.31665,0.30007,0.187287,0.443609,889.316772
10,941.018000,851.025391,0.293539,0.242337,0.474399,851.025513


[I 2025-07-06 12:19:58,732] Trial 38 finished with value: 0.5349177128965991 and parameters: {'learning_rate': 5.431580333958241e-05, 'batch_size': 16, 'warmup_steps': 225, 'weight_decay': 0.004733887818902757}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1790.8112,1745.381714,0.420377,0.113663,0.346643,1745.381714
2,1827.8478,1702.627563,0.415197,0.139665,0.362234,1702.627563
3,1694.0908,1635.689941,0.406953,0.127603,0.360325,1635.689941
4,1696.605,1574.454346,0.399263,0.144369,0.372553,1574.45459
5,1575.3145,1518.512573,0.392106,0.160032,0.383963,1518.512573
6,1567.7394,1466.908936,0.385386,0.163369,0.388992,1466.908813
7,1565.4088,1419.416138,0.379096,0.167936,0.39442,1419.416138
8,1467.6998,1376.193848,0.373279,0.165677,0.396199,1376.193848
9,1473.9398,1336.86145,0.367906,0.167331,0.399712,1336.86145
10,1380.0967,1301.560181,0.363017,0.163508,0.400246,1301.560181


[I 2025-07-06 12:21:50,393] Trial 39 finished with value: 0.42896801369466864 and parameters: {'learning_rate': 9.189810555280745e-06, 'batch_size': 8, 'warmup_steps': 273, 'weight_decay': 0.06441439106875717}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1750.961182,0.421049,0.069655,0.324303,1750.961182
2,No log,1738.205444,0.419512,0.148482,0.364485,1738.205444
3,1816.104800,1704.800049,0.415462,0.145158,0.364848,1704.800049
4,1816.104800,1645.293823,0.408146,0.138558,0.365206,1645.293823
5,1710.115900,1576.022217,0.399462,0.168491,0.384514,1576.022095
6,1710.115900,1508.998291,0.390876,0.167785,0.388455,1508.998291
7,1710.115900,1449.081177,0.383037,0.168137,0.39255,1449.081177
8,1581.539200,1395.589966,0.375901,0.172361,0.39823,1395.589844
9,1581.539200,1348.004028,0.369437,0.167793,0.399178,1348.004028
10,1441.285500,1305.946289,0.363628,0.166668,0.40152,1305.946289


[I 2025-07-06 12:22:45,207] Trial 40 finished with value: 0.43277920202500136 and parameters: {'learning_rate': 3.656611119703801e-05, 'batch_size': 32, 'warmup_steps': 206, 'weight_decay': 0.010738187480246998}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1558.598999,0.397248,0.164305,0.383529,1558.598999
2,1715.596900,1084.697754,0.331397,0.185802,0.427202,1084.697754
3,1271.010900,775.02533,0.280125,0.170686,0.445281,775.025208
4,814.216400,700.634644,0.266342,0.282966,0.508312,700.634644
5,692.237000,660.616577,0.258624,0.321602,0.531489,660.616577
6,690.134800,636.002014,0.25376,0.35705,0.551645,636.002014
7,690.134800,636.598999,0.253879,0.323605,0.534863,636.59906
8,638.028400,649.879639,0.256514,0.354517,0.549002,649.879639
9,585.642300,626.107178,0.251778,0.350833,0.549527,626.107178
10,551.081600,636.237549,0.253807,0.341964,0.544078,636.237549


[I 2025-07-06 12:23:12,680] Trial 41 finished with value: 0.5516448735277103 and parameters: {'learning_rate': 0.000315284444764623, 'batch_size': 16, 'warmup_steps': 220, 'weight_decay': 0.03615976609534856}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1538.550903,0.394685,0.085879,0.345597,1538.550781
2,1705.025500,1052.660156,0.326466,0.123422,0.398478,1052.660034
3,1239.779300,768.058777,0.278863,0.169137,0.445137,768.058777
4,802.981200,701.538208,0.266514,0.268163,0.500824,701.538208
5,692.633400,669.422302,0.260342,0.214152,0.476905,669.422302
6,697.325500,659.553894,0.258416,0.276804,0.509194,659.553894
7,697.325500,644.755859,0.255501,0.304788,0.524644,644.755859
8,645.216600,651.522827,0.256838,0.276012,0.509587,651.522827
9,578.555900,644.848267,0.255519,0.329293,0.536887,644.848206
10,551.707600,665.190125,0.259518,0.340706,0.540594,665.190125


[I 2025-07-06 12:23:59,699] Trial 42 finished with value: 0.5555908485807775 and parameters: {'learning_rate': 0.00028965170062902933, 'batch_size': 16, 'warmup_steps': 192, 'weight_decay': 0.02670511048773002}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1528.215454,0.393357,0.138042,0.372343,1528.215332
2,1700.869700,1026.691895,0.322414,0.10244,0.390013,1026.691895
3,1213.359100,740.054443,0.273732,0.151994,0.439131,740.054565
4,775.644900,696.018494,0.265463,0.195492,0.465014,696.018494
5,687.915200,663.425049,0.259173,0.348669,0.544748,663.425049
6,690.530200,648.463928,0.256234,0.293115,0.518441,648.463867
7,690.530200,625.479431,0.251652,0.324529,0.536438,625.479492
8,643.851700,639.33136,0.254424,0.28739,0.516483,639.33136
9,593.300300,650.049133,0.256547,0.279348,0.5114,650.049133
10,573.397000,609.713745,0.24846,0.36039,0.555965,609.713745


[I 2025-07-06 12:24:48,845] Trial 43 finished with value: 0.5564426240800355 and parameters: {'learning_rate': 0.00038350296119890766, 'batch_size': 16, 'warmup_steps': 242, 'weight_decay': 0.057900253618748634}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1618.245972,0.404778,0.128397,0.36181,1618.245972
2,1737.910800,1234.580322,0.353553,0.144917,0.395682,1234.580322
3,1412.143000,900.474976,0.301947,0.034144,0.366099,900.474854
4,950.064000,748.333496,0.275259,0.21369,0.469215,748.333435
5,738.246300,705.250732,0.267218,0.270521,0.501651,705.250671
6,722.107000,683.003357,0.26297,0.26927,0.50315,683.003418
7,722.107000,667.212219,0.259912,0.216857,0.478472,667.212219
8,686.265100,654.011047,0.257328,0.279759,0.511216,654.010986
9,636.495200,638.187317,0.254196,0.289945,0.517875,638.187256
10,606.113000,631.391663,0.252839,0.314796,0.530979,631.391663


[I 2025-07-06 12:25:42,945] Trial 44 finished with value: 0.5458294074319749 and parameters: {'learning_rate': 0.0002179221699101961, 'batch_size': 16, 'warmup_steps': 225, 'weight_decay': 0.01844967393458508}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1135.243286,0.33903,0.090261,0.375615,1135.243408
2,1509.714500,710.336487,0.26818,0.236347,0.484084,710.336609
3,809.001700,692.067139,0.264709,0.330357,0.532824,692.067017
4,698.016700,652.882263,0.257106,0.316592,0.529743,652.882263
5,649.387200,651.437805,0.256821,0.252498,0.497838,651.437805
6,632.738400,638.533447,0.254265,0.320809,0.533272,638.533386
7,632.738400,662.921204,0.259075,0.302629,0.521777,662.921143
8,585.737800,631.328064,0.252826,0.322397,0.534785,631.328064
9,549.057400,669.280762,0.260315,0.286112,0.512899,669.280701
10,506.321300,668.781128,0.260217,0.278343,0.509063,668.781128


[I 2025-07-06 12:26:25,520] Trial 45 finished with value: 0.5393486071092916 and parameters: {'learning_rate': 0.0004996289843966971, 'batch_size': 16, 'warmup_steps': 93, 'weight_decay': 0.027653797223649905}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1657.006348,0.409597,0.106217,0.34831,1657.006348
2,1756.556300,1337.77063,0.368032,0.144093,0.388031,1337.770752
3,1504.484800,1018.935364,0.321194,0.156383,0.417595,1018.935303
4,1074.408300,837.65625,0.291224,0.175786,0.442281,837.656372
5,825.434200,748.144226,0.275224,0.238172,0.481474,748.144226
6,762.088000,711.774719,0.268451,0.303167,0.517358,711.774658
7,762.088000,698.934998,0.266019,0.310345,0.522163,698.934937
8,715.250500,695.680664,0.265399,0.332265,0.533433,695.680664
9,691.587400,725.399841,0.271009,0.240271,0.484631,725.39978
10,684.529500,646.187622,0.255784,0.31052,0.527368,646.187622


[I 2025-07-06 12:26:58,108] Trial 46 finished with value: 0.5334328578566974 and parameters: {'learning_rate': 0.0001630666046091185, 'batch_size': 16, 'warmup_steps': 215, 'weight_decay': 0.007427509091188607}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,No log,1694.378662,0.41419,0.183151,0.38448,1694.378662
2,No log,1454.670776,0.383775,0.142512,0.379368,1454.670776
3,1688.997700,1163.937378,0.343288,0.125451,0.391081,1163.9375
4,1688.997700,923.335327,0.305755,0.108239,0.401242,923.335327
5,1062.897700,761.446045,0.27766,0.17469,0.448515,761.446106
6,1062.897700,701.668945,0.266539,0.097636,0.415549,701.668945
7,1062.897700,694.828674,0.265236,0.303053,0.518908,694.828796
8,734.847000,665.521667,0.259582,0.254572,0.497495,665.521729
9,734.847000,653.676941,0.257262,0.258582,0.50066,653.677002
10,669.701200,643.958008,0.255342,0.295228,0.519943,643.957947


[I 2025-07-06 12:27:33,472] Trial 47 finished with value: 0.5543163209759048 and parameters: {'learning_rate': 0.00038509263994625746, 'batch_size': 32, 'warmup_steps': 186, 'weight_decay': 0.01307683617720472}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1718.3312,1126.532104,0.337727,0.112826,0.387549,1126.532104
2,921.5705,744.817749,0.274612,0.156847,0.441118,744.817749
3,712.0953,698.374023,0.265912,0.254966,0.494527,698.374084
4,703.1741,680.636597,0.262514,0.221964,0.479725,680.636658
5,662.5634,639.080933,0.254374,0.319417,0.532522,639.080872
6,628.9595,626.038513,0.251765,0.329468,0.538852,626.038513
7,630.5782,621.255676,0.250801,0.377199,0.563199,621.255676
8,564.0397,614.229675,0.249379,0.387623,0.569122,614.229675
9,538.3727,631.563538,0.252873,0.394392,0.57076,631.563538
10,486.245,634.914917,0.253543,0.390861,0.568659,634.914917


[I 2025-07-06 12:28:51,651] Trial 48 finished with value: 0.5816623837702477 and parameters: {'learning_rate': 0.00022069517200075023, 'batch_size': 8, 'warmup_steps': 168, 'weight_decay': 0.006166026366219402}. Best is trial 22 with value: 0.586500525139622.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1701.0073,1051.018677,0.326212,0.146299,0.410044,1051.018677
2,881.043,731.493896,0.272145,0.137796,0.432826,731.493896
3,708.5134,697.01709,0.265654,0.3135,0.523923,697.01709
4,702.0759,743.591125,0.274386,0.240924,0.483269,743.591125
5,679.7016,664.75647,0.259433,0.225745,0.483156,664.75647
6,650.9129,669.350281,0.260328,0.198273,0.468972,669.35022
7,654.595,648.662537,0.256273,0.335316,0.539521,648.662537
8,583.6161,652.475891,0.257026,0.257835,0.500405,652.47583
9,596.6724,653.624573,0.257252,0.320581,0.531665,653.624512
10,529.3234,639.688171,0.254495,0.319885,0.532695,639.688171


[I 2025-07-06 12:29:37,092] Trial 49 finished with value: 0.5395211332783588 and parameters: {'learning_rate': 0.00023152889753023145, 'batch_size': 8, 'warmup_steps': 135, 'weight_decay': 0.0059508921263809215}. Best is trial 22 with value: 0.586500525139622.


최적화 완료!
최고 점수: 0.5865
최적 하이퍼파라미터: {'learning_rate': 0.00030994900647604996, 'batch_size': 8, 'warmup_steps': 201, 'weight_decay': 0.007364521772775286}


In [8]:
# 최적 하이퍼파라미터로 최종 훈련
print("\n최적 하이퍼파라미터로 최종 훈련 시작...")
best_params = study.best_params

training_args = TrainingArguments(
    output_dir='./chemberta_best',
    num_train_epochs=100,
    per_device_train_batch_size=best_params['batch_size'],
    per_device_eval_batch_size=best_params['batch_size'],
    warmup_steps=best_params['warmup_steps'],
    weight_decay=best_params['weight_decay'],
    learning_rate=best_params['learning_rate'],
    logging_steps=50,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="competition_score",
    greater_is_better=True,
    save_total_limit=3,
    report_to=None,
    fp16=False,
    dataloader_pin_memory=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=15)]
)


최적 하이퍼파라미터로 최종 훈련 시작...


In [9]:
# 모델 훈련
print("모델 훈련 시작...")
trainer.train()

# 검증 성능 평가
print("검증 성능 평가...")
eval_results = trainer.evaluate()
print(f"최종 검증 성능: {eval_results}")



모델 훈련 시작...


Epoch,Training Loss,Validation Loss,Nrmse,Pearson,Competition Score,Mse
1,1517.4628,1074.737793,0.329872,0.189614,0.429871,1074.737793
2,706.3205,709.433289,0.268009,0.288829,0.51041,709.433289
3,718.4934,694.879333,0.265246,0.335674,0.535214,694.879333
4,712.6187,651.726074,0.256878,0.289535,0.516329,651.726074
5,626.076,629.351624,0.25243,0.335201,0.541386,629.351624
6,644.0443,635.519409,0.253664,0.299992,0.523164,635.519409
7,579.3738,696.604309,0.265575,0.301205,0.517815,696.604309
8,589.8416,623.44342,0.251242,0.331755,0.540256,623.44342
9,565.9548,648.397461,0.256221,0.362771,0.553275,648.397461
10,539.277,624.470947,0.251449,0.374752,0.561651,624.470886


검증 성능 평가...


최종 검증 성능: {'eval_loss': 620.6712646484375, 'eval_nrmse': 0.25068310833942326, 'eval_pearson': 0.3833638332789165, 'eval_competition_score': 0.5663403624697466, 'eval_mse': 620.6712646484375, 'eval_runtime': 0.354, 'eval_samples_per_second': 951.935, 'eval_steps_per_second': 121.463, 'epoch': 27.0}


In [10]:
# 전체 데이터로 재훈련
print("\n전체 데이터로 최종 훈련...")
full_dataset = SMILESDataset(train_df['Canonical_Smiles'].values, train_df['Inhibition'].values)

final_training_args = TrainingArguments(
    output_dir='./chemberta_final',
    num_train_epochs=25,
    per_device_train_batch_size=best_params['batch_size'],
    warmup_steps=best_params['warmup_steps'],
    weight_decay=best_params['weight_decay'],
    learning_rate=best_params['learning_rate'],
    logging_steps=50,
    save_strategy="no",
    report_to=None,
    fp16=False
)

final_trainer = Trainer(
    model=model,
    args=final_training_args,
    train_dataset=full_dataset,
    data_collator=data_collator,
)

final_trainer.train()


전체 데이터로 최종 훈련...


Step,Training Loss
50,430.3592
100,464.3448
150,450.7077
200,466.6807
250,477.2329
300,411.0991
350,432.0484
400,450.065
450,398.0232
500,479.5968


TrainOutput(global_step=5275, training_loss=229.05776811500297, metrics={'train_runtime': 99.2923, 'train_samples_per_second': 423.245, 'train_steps_per_second': 53.126, 'total_flos': 45370363629480.0, 'train_loss': 229.05776811500297, 'epoch': 25.0})

In [11]:
print("테스트 데이터 예측 중...")
test_dataset = SMILESDataset(test_df['Canonical_Smiles'].values)
predictions = trainer.predict(test_dataset)
test_predictions = predictions.predictions.flatten()

# 결과 저장
submission['Inhibition'] = test_predictions
submission.to_csv('chemberta_optuna_submission.csv', index=False)

print(f"\n최적화 결과:")
print(f"- 최고 검증 점수: {study.best_value:.4f}")
print(f"- 최적 학습률: {best_params['learning_rate']:.2e}")
print(f"- 최적 배치 크기: {best_params['batch_size']}")
print(f"- 최적 Warmup Steps: {best_params['warmup_steps']}")
print(f"- 최적 Weight Decay: {best_params['weight_decay']:.4f}")
print("예측 결과 저장: chemberta_optuna_submission.csv")

테스트 데이터 예측 중...

최적화 결과:
- 최고 검증 점수: 0.5865
- 최적 학습률: 3.10e-04
- 최적 배치 크기: 8
- 최적 Warmup Steps: 201
- 최적 Weight Decay: 0.0074
예측 결과 저장: chemberta_optuna_submission.csv


In [12]:
# 예측값 분석
import pandas as pd
import numpy as np

# 결과 파일 로드
submission = pd.read_csv('chemberta_optuna_submission.csv')
train_df = pd.read_csv('train.csv')

print("=== 예측값 분석 ===")
print(f"예측값 범위: {submission['Inhibition'].min():.2f} ~ {submission['Inhibition'].max():.2f}")
print(f"실제값 범위: {train_df['Inhibition'].min():.2f} ~ {train_df['Inhibition'].max():.2f}")
print(f"예측값 평균: {submission['Inhibition'].mean():.2f}")
print(f"실제값 평균: {train_df['Inhibition'].mean():.2f}")
print(f"예측값 표준편차: {submission['Inhibition'].std():.2f}")
print(f"실제값 표준편차: {train_df['Inhibition'].std():.2f}")

print("\n=== 예측값 분포 ===")
print(submission['Inhibition'].describe())

print("\n=== 이상치 확인 ===")
print(f"음수값 개수: {(submission['Inhibition'] < 0).sum()}")
print(f"100 초과값 개수: {(submission['Inhibition'] > 100).sum()}")
print(f"NaN 개수: {submission['Inhibition'].isna().sum()}")

# 극단값 확인
print(f"\n가장 작은 5개 값: {submission['Inhibition'].nsmallest(5).values}")
print(f"가장 큰 5개 값: {submission['Inhibition'].nlargest(5).values}")

=== 예측값 분석 ===
예측값 범위: -0.60 ~ 74.70
실제값 범위: 0.00 ~ 99.38
예측값 평균: 29.62
실제값 평균: 33.22
예측값 표준편차: 20.12
실제값 표준편차: 26.41

=== 예측값 분포 ===
count    100.000000
mean      29.615916
std       20.120878
min       -0.595903
25%       13.647787
50%       24.885278
75%       44.081975
max       74.699570
Name: Inhibition, dtype: float64

=== 이상치 확인 ===
음수값 개수: 1
100 초과값 개수: 0
NaN 개수: 0

가장 작은 5개 값: [-0.5959028   0.13939762  1.3311311   1.8108578   1.9346439 ]
가장 큰 5개 값: [74.69957 74.68019 74.62001 73.23399 72.18682]
