In [None]:
# 0) Install TabPFN
!pip install -q tabpfn

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/160.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m160.8/160.8 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# 1) Imports
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from tabpfn import TabPFNRegressor

In [None]:
# 2) Load CSV (robust-ish)
from google.colab import files
uploaded = files.upload()                      # choose your CSV
CSV_PATH = next(iter(uploaded.keys()))
df = pd.read_csv(CSV_PATH, engine="python", on_bad_lines="skip")
print("Loaded:", df.shape, "rows x cols")

Saving DATE_400_micro_m_info.csv to DATE_400_micro_m_info.csv
Loaded: (14995, 10) rows x cols


In [None]:
# 3) Features = first 4 columns; Targets = the rest (numeric only)
feature_cols = list(df.columns[:4])
candidate_targets = list(df.columns[4:])
numeric_targets = [c for c in candidate_targets if pd.api.types.is_numeric_dtype(df[c])]
if not numeric_targets:
    raise ValueError("No numeric target columns detected after the first 4 columns.")

print("Feature columns:", feature_cols)
print("Target columns :", numeric_targets)

Feature columns: ['Gamma', 'NQW', 'kappa', 'I']
Target columns : ['Pmax', 'FWHM', 'Etotal', 'Epulse', 'Coeff', 'Delay']


In [None]:
# 4) Clean up (coerce numerics, fill NaNs)
df[feature_cols]   = df[feature_cols].apply(pd.to_numeric, errors="coerce")
df[numeric_targets]= df[numeric_targets].apply(pd.to_numeric, errors="coerce")
df[feature_cols]   = df[feature_cols].fillna(df[feature_cols].median())
df[numeric_targets]= df[numeric_targets].fillna(df[numeric_targets].median())

X = df[feature_cols].values
idx_all = np.arange(len(df))

# --- Cap the training set at 10k rows (TabPFN's default supported max) ---
train_size = min(10_000, len(df) - 1)   # leave at least 1 row for test
idx_train, idx_test = train_test_split(idx_all, train_size=train_size, random_state=42)
X_train, X_test = X[idx_train], X[idx_test]

In [None]:
# 5) Train per target (auto CPU/GPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

results = []
for col in numeric_targets:
    y = df[col].values.astype(float)
    y_train, y_test = y[idx_train], y[idx_test]

    reg = TabPFNRegressor(device=device)  # compliant with 10k cap
    reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    r2  = r2_score(y_test, y_pred)

    print(f"Target: {col}")
    print("Mean Squared Error (MSE):", mse)
    print("R² Score:", r2)
    print("-" * 50)

    results.append({"Target": col, "MSE": mse, "R2": r2})

# 6) Summary table
metrics_df = pd.DataFrame(results).sort_values("R2", ascending=False).reset_index(drop=True)
display(metrics_df)
metrics_df.to_csv("/content/tabpfn_metrics.csv", index=False)
print("Saved: /content/tabpfn_metrics.csv")


Using device: cuda


tabpfn-v2-regressor.ckpt:   0%|          | 0.00/44.4M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/37.0 [00:00<?, ?B/s]

Target: Pmax
Mean Squared Error (MSE): 0.018402522376147888
R² Score: 0.96044472605934
--------------------------------------------------
Target: FWHM
Mean Squared Error (MSE): 1.8750212724622022
R² Score: 0.8886884938359635
--------------------------------------------------
Target: Etotal
Mean Squared Error (MSE): 0.12532477248831866
R² Score: 0.9991217503402008
--------------------------------------------------
Target: Epulse
Mean Squared Error (MSE): 2.2175403375688933
R² Score: 0.9621457245489102
--------------------------------------------------
Target: Coeff
Mean Squared Error (MSE): 0.0028770338560772163
R² Score: 0.9301100848225055
--------------------------------------------------
Target: Delay
Mean Squared Error (MSE): 5.236364796161693e-05
R² Score: 0.9627743577111931
--------------------------------------------------


Unnamed: 0,Target,MSE,R2
0,Etotal,0.125325,0.999122
1,Delay,5.2e-05,0.962774
2,Epulse,2.21754,0.962146
3,Pmax,0.018403,0.960445
4,Coeff,0.002877,0.93011
5,FWHM,1.875021,0.888688


Saved: /content/tabpfn_metrics.csv
