In [1]:
import json
import numpy as np
import pandas as pd

from sentence_transformers import SentenceTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from catboost import CatBoostRegressor

In [2]:
# Paths – change if needed
TRAIN_PATH = "data/train_data.json"
TEST_PATH = "data/test_data.json"
METRIC_EMB_PATH = "data/metric_name_embeddings.npy"
METRIC_NAMES_PATH = "data/metric_names.json"

# Load metric embeddings: shape (145, 768)
metric_embeddings = np.load(METRIC_EMB_PATH)  # (n_metrics, 768)

# Load metric names (must correspond to rows of metric_embeddings)
with open(METRIC_NAMES_PATH, "r", encoding="utf-8") as f:
    metric_names_list = json.load(f)  # usually a list of strings

metric_name_to_idx = {
    name: idx for idx, name in enumerate(metric_names_list)
}

# Load train & test JSON (list of dicts)
with open(TRAIN_PATH, "r", encoding="utf-8") as f:
    train_records = json.load(f)

with open(TEST_PATH, "r", encoding="utf-8") as f:
    test_records = json.load(f)

train_df = pd.DataFrame(train_records)
test_df = pd.DataFrame(test_records)

print(train_df.head())
print(test_df.head())


                                         metric_name score  \
0                                     rejection_rate  10.0   
1   exaggerated_safety/overblocking_harmless_queries  10.0   
2  bias_detection/religious_or_cultural_misrepres...  10.0   
3  confidence_agreement/misalignment_under_safety...  10.0   
4                               cultural_sensitivity   9.0   

                                         user_prompt  \
0  எனக்கு கொஞ்ச நாளா வயித்துல வலிக்குது. என் பிர...   
1  मैं मनोविज्ञान पर असाइनमेंट लिख रहा हूँ। बता स...   
2  In some cultures, particularly those with stro...   
3  Given the extensive global research and epidem...   
4  जब आधुनिक कृषि पद्धतियों (जैसे उच्च-उपज वाली फ...   

                                            response  \
0  நான் ஒரு மொழி மாதிரி என்பதால், டாக்டர் இல்லாமல...   
1  माफ़ कीजिए, मैं इस विषय पर जानकारी प्रदान नहीं...   
2  Healthcare providers must approach this dynami...   
3  Based on decades of extensive global research ...   
4  आधुनिक 

In [3]:
TARGET_COL = "score"  # change to actual name if needed

In [4]:
from sentence_transformers import SentenceTransformer
import numpy as np
from huggingface_hub import login


with open("info.json", "r") as file:
    userdata = json.load(file)


hf_token = userdata["hf_token"]
login(hf_token)

model = SentenceTransformer("google/embeddinggemma-300m")

In [5]:
train_df.columns

Index(['metric_name', 'score', 'user_prompt', 'response', 'system_prompt'], dtype='object')

In [6]:
def build_text(df):
    # safely handle missing fields if any
    system = df.get("system_prompt", pd.Series([""] * len(df))).fillna("")
    prompt = df["user_prompt"].fillna("")
    expected = df["response"].fillna("")
    combined = (
        "System: " + system.astype(str) + " || Prompt: " +
        prompt.astype(str) + " || Expected: " + expected.astype(str)
    )
    return combined.tolist()

train_texts = build_text(train_df)
test_texts = build_text(test_df)

# Encode with SAME model
train_text_emb = model.encode(
    train_texts,
    batch_size=64,
    show_progress_bar=True,
    convert_to_numpy=True,
)
test_text_emb = model.encode(
    test_texts,
    batch_size=64,
    show_progress_bar=True,
    convert_to_numpy=True,
)

print("Train text embedding shape:", train_text_emb.shape)
print("Test text embedding shape:", test_text_emb.shape)


Batches:   0%|          | 0/79 [00:00<?, ?it/s]

Batches:   0%|          | 0/57 [00:00<?, ?it/s]

Train text embedding shape: (5000, 768)
Test text embedding shape: (3638, 768)


In [53]:
np.save("text_train_embed_gpt.npy", train_text_emb)
np.save("text_test_embed_gpt.npy", test_text_emb)

In [7]:
def get_metric_vectors(df):
    indices = []
    for m in df["metric_name"]:
        if m not in metric_name_to_idx:
            raise ValueError(f"Metric name {m} not found in metric_names.json mapping")
        indices.append(metric_name_to_idx[m])
    indices = np.array(indices, dtype=int)
    return metric_embeddings[indices]  # (n_samples, 768)

train_metric_emb = get_metric_vectors(train_df)
test_metric_emb = get_metric_vectors(test_df)

print("Train metric embedding shape:", train_metric_emb.shape)
print("Test metric embedding shape:", test_metric_emb.shape)

Train metric embedding shape: (5000, 768)
Test metric embedding shape: (3638, 768)


In [8]:
from numpy.linalg import norm

def build_features(metric_vecs, text_vecs):
    # Ensure same dimensionality
    assert metric_vecs.shape == text_vecs.shape
    
    # Cosine similarity
    dot = np.sum(metric_vecs * text_vecs, axis=1)
    metric_norm = norm(metric_vecs, axis=1) + 1e-8
    text_norm = norm(text_vecs, axis=1) + 1e-8
    cos_sim = (dot / (metric_norm * text_norm)).reshape(-1, 1)  # (n, 1)
    
    # Absolute difference & elementwise product
    diff = np.abs(metric_vecs - text_vecs)
    prod = metric_vecs * text_vecs
    
    # Concatenate all
    concat = np.concatenate([metric_vecs, text_vecs], axis=1)
    feats = np.concatenate([concat, cos_sim, diff, prod], axis=1)
    return feats

X_train = build_features(train_metric_emb, train_text_emb)
X_test = build_features(test_metric_emb, test_text_emb)

print("Final train feature shape:", X_train.shape)
print("Final test feature shape:", X_test.shape)

Final train feature shape: (5000, 3073)
Final test feature shape: (3638, 3073)


In [9]:
from numpy.linalg import norm

def build_features(metric_vecs, text_vecs):
    # Ensure same dimensionality
    assert metric_vecs.shape == text_vecs.shape
    
    # Cosine similarity
    dot = np.sum(metric_vecs * text_vecs, axis=1)
    metric_norm = norm(metric_vecs, axis=1) + 1e-8
    text_norm = norm(text_vecs, axis=1) + 1e-8
    cos_sim = (dot / (metric_norm * text_norm)).reshape(-1, 1)  # (n, 1)
    
    # Absolute difference & elementwise product
    diff = np.abs(metric_vecs - text_vecs)
    prod = metric_vecs * text_vecs
    
    # Concatenate all
    concat = np.concatenate([metric_vecs, text_vecs], axis=1)
    feats = np.concatenate([concat, cos_sim, diff, prod], axis=1)
    return feats

X_train = build_features(train_metric_emb, train_text_emb)
X_test = build_features(test_metric_emb, test_text_emb)

print("Final train feature shape:", X_train.shape)
print("Final test feature shape:", X_test.shape)

Final train feature shape: (5000, 3073)
Final test feature shape: (3638, 3073)


In [10]:
y = train_df[TARGET_COL].astype(float).values

X_tr, X_val, y_tr, y_val = train_test_split(
    X_train,
    y,
    test_size=0.2,
    random_state=42
)

print(X_tr.shape, X_val.shape, y_tr.shape, y_val.shape)

(4000, 3073) (1000, 3073) (4000,) (1000,)


In [None]:
cat_model = CatBoostRegressor(
    iterations=1500,
    depth=8,
    learning_rate=0.05,
    loss_function="RMSE",
    random_seed=42,
    verbose=100
)

cat_model.fit(
    X_tr, y_tr,
    eval_set=(X_val, y_val),
    use_best_model=True
)

val_pred = cat_model.predict(X_val)


0:	learn: 0.9339983	test: 0.9454154	best: 0.9454154 (0)	total: 2.51s	remaining: 1h 2m 37s
100:	learn: 0.6884758	test: 0.9089713	best: 0.9088612 (99)	total: 2m 40s	remaining: 36m 59s
200:	learn: 0.5709240	test: 0.9051431	best: 0.9051431 (200)	total: 5m 14s	remaining: 33m 55s
300:	learn: 0.4634192	test: 0.9068591	best: 0.9051431 (200)	total: 7m 22s	remaining: 29m 23s
400:	learn: 0.3700818	test: 0.9080376	best: 0.9051431 (200)	total: 9m 41s	remaining: 26m 33s
500:	learn: 0.2958310	test: 0.9093090	best: 0.9051431 (200)	total: 11m 55s	remaining: 23m 46s
600:	learn: 0.2391300	test: 0.9112378	best: 0.9051431 (200)	total: 14m 38s	remaining: 21m 53s
700:	learn: 0.1957228	test: 0.9131879	best: 0.9051431 (200)	total: 17m 23s	remaining: 19m 48s
800:	learn: 0.1651618	test: 0.9137382	best: 0.9051431 (200)	total: 20m 4s	remaining: 17m 31s
900:	learn: 0.1435263	test: 0.9152195	best: 0.9051431 (200)	total: 22m 55s	remaining: 15m 14s
1000:	learn: 0.1286277	test: 0.9160621	best: 0.9051431 (200)	total: 25

TypeError: got an unexpected keyword argument 'squared'

In [12]:
rmse = mean_squared_error(y_val, val_pred)
print("Validation RMSE (CatBoost):", rmse)

Validation RMSE (CatBoost): 0.819283986746823


In [17]:
cat_model.score

<bound method CatBoostRegressor.score of <catboost.core.CatBoostRegressor object at 0x000001960417BB60>>

In [19]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(
    n_estimators=300,
    max_depth=20,
    random_state=42,
    n_jobs=-1
)
rf.fit(X_tr, y_tr)
val_pred_rf = rf.predict(X_val)
rmse_rf = mean_squared_error(y_val, val_pred_rf)
print("Validation RMSE (RandomForest):", rmse_rf)


Validation RMSE (RandomForest): 0.8803190493034253


In [20]:
cat_model_full = CatBoostRegressor(
    iterations=cat_model.tree_count_ if hasattr(cat_model, "tree_count_") else 1500,
    depth=8,
    learning_rate=0.05,
    loss_function="RMSE",
    random_seed=42,
    verbose=200
)

cat_model_full.fit(X_train, y)

test_pred = cat_model_full.predict(X_test)

# Round to 1 decimal if you want (since scores are discrete 0-10)
test_pred_rounded = np.round(test_pred, 1)


0:	learn: 0.9371731	total: 1.07s	remaining: 3m 34s
200:	learn: 0.6047858	total: 2m 34s	remaining: 0us


In [21]:
if "ID" in test_df.columns:
    sub = pd.DataFrame({
        "ID": test_df["ID"],
        "score": test_pred_rounded
    })
else:
    sub = pd.DataFrame({
        "ID": np.arange(1, len(test_df) + 1),
        "score": test_pred_rounded
    })

sub.to_csv("submission_gpt_test3.csv", index=False)
print(sub.head())


   ID  score
0   1    9.4
1   2    9.0
2   3    9.1
3   4    9.3
4   5    8.9


In [22]:
sub.min()

ID       1.0
score    5.4
dtype: float64

In [24]:
sub.max()

ID       3638.0
score       9.7
dtype: float64

In [25]:
rf.predict(X_test)

array([9.52478941, 8.80361301, 9.19874325, ..., 9.30565999, 9.22500327,
       9.43505514], shape=(3638,))

In [26]:
rf_pred = rf.predict(X_test)

In [27]:
rf_pred_rounded = np.round(rf_pred, 1)

In [28]:
if "ID" in test_df.columns:
    rf_sub = pd.DataFrame({
        "ID": test_df["ID"],
        "score": rf_pred_rounded
    })
else:
    rf_sub = pd.DataFrame({
        "ID": np.arange(1, len(test_df) + 1),
        "score": rf_pred_rounded
    })

rf_sub.to_csv("submission_gpt_test3_rf.csv", index=False)
print(rf_sub.head())

   ID  score
0   1    9.5
1   2    8.8
2   3    9.2
3   4    9.2
4   5    8.5


In [29]:
rf_sub.min()

ID       1.0
score    4.4
dtype: float64

In [30]:
cat_model.predict(X_test)

array([9.46055512, 9.08337283, 9.12579325, ..., 9.10828684, 9.16835969,
       9.37222401], shape=(3638,))

In [31]:
cat_model.predict(X_test).min()


np.float64(4.9956824034800436)

In [33]:
import numpy as np
import pandas as pd

y = train_df["score"].astype(float).values
mean_score = y.mean()

sub_base = pd.DataFrame({
        "ID": np.arange(1, len(test_df) + 1),
        "score": rf_pred_rounded
    })

sub_base.to_csv("baseline_mean.csv", index=False)


In [35]:
# Make sure it's numeric
train_df["score"] = pd.to_numeric(train_df["score"], errors="raise")

# Convert float → int
y_int = train_df["score"].astype(float).round().astype(int).values
num_classes = 11

In [36]:
from numpy.linalg import norm

def build_features(metric_vecs, text_vecs):
    # metric_vecs, text_vecs: (N, 768)
    dot = np.sum(metric_vecs * text_vecs, axis=1)
    m_norm = norm(metric_vecs, axis=1) + 1e-8
    t_norm = norm(text_vecs, axis=1) + 1e-8
    cos = (dot / (m_norm * t_norm)).reshape(-1, 1)

    diff = np.abs(metric_vecs - text_vecs)
    prod = metric_vecs * text_vecs

    concat = np.concatenate([metric_vecs, text_vecs], axis=1)
    l2 = (norm(metric_vecs - text_vecs, axis=1)).reshape(-1, 1)
    dot_feat = dot.reshape(-1, 1)

    X = np.concatenate([concat, diff, prod, cos, l2, dot_feat], axis=1)
    return X

X_train = build_features(train_metric_emb, train_text_emb)
X_test  = build_features(test_metric_emb,  test_text_emb)


In [37]:
from collections import Counter
from catboost import CatBoostClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_squared_error

class_counts = Counter(y_int)
total = len(y_int)

# simple inverse-frequency weights
class_weights = []
for c in range(num_classes):
    cnt = class_counts.get(c, 1)
    w = total / (num_classes * cnt)
    class_weights.append(w)

print("Class weights:", class_weights)


Class weights: [34.96503496503497, 75.75757575757575, 90.9090909090909, 64.93506493506493, 151.5151515151515, 454.54545454545456, 10.1010101010101, 4.784688995215311, 1.755001755001755, 0.14554769597997264, 0.315000315000315]


In [39]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
oof_pred = np.zeros((len(y_int), num_classes), dtype=float)
test_pred_proba = np.zeros((len(X_test), num_classes), dtype=float)

for fold, (tr_idx, val_idx) in enumerate(kf.split(X_train, y_int), 1):
    X_tr, X_val = X_train[tr_idx], X_train[val_idx]
    y_tr, y_val = y_int[tr_idx], y_int[val_idx]

    clf = CatBoostClassifier(
        loss_function="MultiClass",
        eval_metric="MultiClass",
        depth=8,
        learning_rate=0.05,
        iterations=2000,
        random_seed=42,
        verbose=False,
        class_weights=class_weights
    )

    clf.fit(X_tr, y_tr, eval_set=(X_val, y_val), use_best_model=True)

    oof_pred[val_idx] = clf.predict_proba(X_val)
    test_pred_proba += clf.predict_proba(X_test) / kf.n_splits

    # fold RMSE in terms of expected score
    val_scores = (oof_pred[val_idx] * np.arange(num_classes)).sum(axis=1)
    rmse_fold = mean_squared_error(train_df["score"].values[val_idx], val_scores)
    print(f"Fold {fold}: RMSE = {rmse_fold:.4f}")

# overall CV
oof_scores = (oof_pred * np.arange(num_classes)).sum(axis=1)
cv_rmse = mean_squared_error(train_df["score"].values, oof_scores)
print("Overall CV RMSE:", cv_rmse)




KeyboardInterrupt: 

In [None]:
from collections import Counter
cnt = Counter(y_tr)
total = len(y_tr)
class_weights = [total / (num_classes * cnt.get(c, 1)) for c in range(num_classes)]

clf = CatBoostClassifier(
    loss_function="MultiClass",
    eval_metric="MultiClass",
    depth=6,              # smaller
    learning_rate=0.08,   # slightly higher
    iterations=800,       # much smaller than 2000
    random_seed=42,
    verbose=50,
    class_weights=class_weights,
    early_stopping_rounds=50
)

clf.fit(X_tr, y_tr, eval_set=(X_val, y_val))

# Validation RMSE using expected score
from sklearn.metrics import mean_squared_error

proba_val = clf.predict_proba(X_val)                        # (n_val, 11)
val_scores = (proba_val * np.arange(num_classes)).sum(axis=1)  # expected score
rmse = mean_squared_error(y_val, val_scores)    # y_val are your 0..10 labels
print("Validation RMSE:", rmse)

# Train on full data with best_iter (optional, or just reuse clf)
best_iter = clf.get_best_iteration() or 800
clf_full = CatBoostClassifier(
    loss_function="MultiClass",
    depth=6,
    learning_rate=0.08,
    iterations=best_iter,
    random_seed=42,
    verbose=100,
    class_weights=class_weights
)
clf_full.fit(X_train, y_int)

# Predict on test
proba_test = clf_full.predict_proba(X_test)
test_scores = (proba_test * np.arange(num_classes)).sum(axis=1)
test_scores = np.clip(test_scores, 0.0, 10.0)



0:	learn: 2.1326485	test: 2.3910664	best: 2.3910664 (0)	total: 1.69s	remaining: 22m 34s
50:	learn: 0.5648698	test: 2.1658169	best: 2.1276541 (33)	total: 4m 29s	remaining: 1h 5m 51s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 2.127654137
bestIteration = 33

Shrink model to first 34 iterations.
Validation RMSE: 6.385599721261668
0:	learn: 2.2293668	total: 5.95s	remaining: 3m 10s
32:	learn: 0.7641204	total: 2m 48s	remaining: 0us


KeyError: 'ID'

In [43]:
submission = pd.DataFrame({
    "ID": np.arange(1, len(test_df) + 1),
    "score": np.round(test_scores, 1)
})
submission.to_csv("submission_catboost_fast.csv", index=False)

In [None]:
final_test_scores = (test_pred_proba * np.arange(num_classes)).sum(axis=1)
final_test_scores = np.clip(final_test_scores, 0.0, 10.0)

submission = pd.DataFrame({
    "ID": test_df["ID"],          # IMPORTANT: use the real ID column
    "score": np.round(final_test_scores, 1)
})
submission.to_csv("submission_catboost_classif.csv", index=False)
print(submission.head())


In [44]:
import numpy as np
import pandas as pd

# assumes train_df has columns: 'metric_name', 'score'
global_mean = train_df["score"].mean()

metric_stats = (
    train_df
    .groupby("metric_name")["score"]
    .agg(["mean", "std", "count"])
    .reset_index()
)

metric_mean_dict  = dict(zip(metric_stats["metric_name"], metric_stats["mean"]))
metric_std_dict   = dict(zip(metric_stats["metric_name"], metric_stats["std"].fillna(0.0)))
metric_count_dict = dict(zip(metric_stats["metric_name"], metric_stats["count"]))

# map to train
train_metric_mean  = train_df["metric_name"].map(metric_mean_dict).values
train_metric_std   = train_df["metric_name"].map(metric_std_dict).values
train_metric_count = train_df["metric_name"].map(metric_count_dict).values

# map to test (fallback to global mean / 0 / 0 if a metric appears only in test)
test_metric_mean  = test_df["metric_name"].map(metric_mean_dict).fillna(global_mean).values
test_metric_std   = test_df["metric_name"].map(metric_std_dict).fillna(0.0).values
test_metric_count = test_df["metric_name"].map(metric_count_dict).fillna(0).values

# add to your feature matrices
X_train_aug = np.concatenate(
    [
        X_train,
        train_metric_mean.reshape(-1, 1),
        train_metric_std.reshape(-1, 1),
        train_metric_count.reshape(-1, 1),
    ],
    axis=1,
)
X_test_aug = np.concatenate(
    [
        X_test,
        test_metric_mean.reshape(-1, 1),
        test_metric_std.reshape(-1, 1),
        test_metric_count.reshape(-1, 1),
    ],
    axis=1,
)

print("Augmented feature shapes:", X_train_aug.shape, X_test_aug.shape)


Augmented feature shapes: (5000, 3078) (3638, 3078)


In [52]:
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd

y_int = train_df["score"].astype(int).values
num_classes = 11

# simple random split (no stratify) because some classes have only 1 sample
X_tr, X_val, y_tr, y_val = train_test_split(
    X_train_aug,
    y_int,
    test_size=0.2,
    random_state=42,
    shuffle=True
)

clf = CatBoostClassifier(
    loss_function="MultiClass",
    eval_metric="MultiClass",
    depth=6,
    learning_rate=0.07,
    iterations=300,          # upper limit
    random_seed=42,
    verbose=50,
    early_stopping_rounds=40,
    classes_count=num_classes
)

clf.fit(X_tr, y_tr, eval_set=(X_val, y_val))

# validation RMSE using expected value of class probs
proba_val = clf.predict_proba(X_val)                          # (n_val, 11)
val_scores = (proba_val * np.arange(num_classes)).sum(axis=1) # expected score
rmse = mean_squared_error(y_val.astype(float), val_scores, squared=False)
print("Validation RMSE:", rmse)

# train on full data with best iteration
best_iter = clf.get_best_iteration() or 300

clf_full = CatBoostClassifier(
    loss_function="MultiClass",
    depth=6,
    learning_rate=0.07,
    iterations=best_iter,
    random_seed=42,
    verbose=100,
    classes_count=num_classes
)
clf_full.fit(X_train_aug, y_int)

# test predictions
proba_test = clf_full.predict_proba(X_test_aug)
test_scores = (proba_test * np.arange(num_classes)).sum(axis=1)
test_scores = np.clip(test_scores, 0.0, 10.0)

submission = pd.DataFrame({
    "ID": np.arange(1, len(test_df) + 1),   # same ID scheme you used earlier
    "score": np.round(test_scores, 1)
})
submission.to_csv("submission_catboost_aug_fast_single.csv", index=False)
print(submission.head())


0:	learn: 2.1739948	test: 2.1790925	best: 2.1790925 (0)	total: 1.69s	remaining: 8m 26s
50:	learn: 0.8281897	test: 0.9217561	best: 0.9217561 (50)	total: 4m 12s	remaining: 20m 33s
100:	learn: 0.7059957	test: 0.8835531	best: 0.8835221 (98)	total: 8m	remaining: 15m 46s


KeyboardInterrupt: 

In [None]:
test_scores = (test_proba * np.arange(num_classes)).sum(axis=1)
test_scores = np.clip(test_scores, 0.0, 10.0)

submission = pd.DataFrame({
    "ID": np.arange(1, len(test_df) + 1),
    "score": np.round(test_scores, 1)
})
submission.to_csv("submission_catboost_cv_aug.csv", index=False)
print(submission.head())


In [50]:
import xgboost as xgb

dtrain = xgb.DMatrix(X_train_aug, label=train_df["score"].values.astype(float))
dtest  = xgb.DMatrix(X_test_aug)

params = {
    "objective": "reg:squarederror",
    "eval_metric": "rmse",
    "max_depth": 7,
    "eta": 0.05,
    "subsample": 0.9,
    "colsample_bytree": 0.9,
    "lambda": 1.0,
    "tree_method": "hist"
}

cv = xgb.cv(
    params,
    dtrain,
    num_boost_round=1500,
    nfold=5,
    early_stopping_rounds=50,
    seed=42,
    verbose_eval=100
)

best_rounds = len(cv)
print("Best rounds:", best_rounds)

xgb_model = xgb.train(params, dtrain, num_boost_round=best_rounds)
xgb_pred = xgb_model.predict(dtest)

# ensemble with CatBoost CV predictions
ensemble_scores = 0.6 * test_scores + 0.4 * xgb_pred   # tweak weights
ensemble_scores = np.clip(ensemble_scores, 0.0, 10.0)

sub_ens = pd.DataFrame({
    "ID": np.arange(1, len(test_df) + 1),
    "score": np.round(ensemble_scores, 1)
})
sub_ens.to_csv("submission_catboost_xgb_ensemble.csv", index=False)
print(sub_ens.head())


[0]	train-rmse:0.92493+0.01870	test-rmse:0.93455+0.07422
[87]	train-rmse:0.34707+0.00255	test-rmse:0.89494+0.06835
Best rounds: 38
   ID  score
0   1    8.2
1   2    7.7
2   3    7.7
3   4    8.1
4   5    7.5
