In [2]:
import os
import libsql
import json
import pandas as pd
from dotenv import load_dotenv

load_dotenv()

url = os.getenv("CLAUDELIGHT_DB_URL")
auth_token = os.getenv("CLAUDELIGHT_RW")
conn = libsql.connect("claude_light.db", sync_url=url, auth_token=auth_token)
conn.sync()

In [6]:
# Read data from db and covert to df

df = pd.DataFrame()
for rowid, data in conn.execute("""select * from measurements where json_extract(data, '$.tag') = '20250731'""").fetchall():
    j = json.loads(data)
    
    R, G, B = j['in']
    selected_outputs = ['445nm', '515nm', '630nm', 'clear']
    out_values = [j['out'][key] for key in selected_outputs]

    flat_data = {
        'R': R,
        'G': G,
        'B': B,
        'I_445': out_values[0],
        'I_515': out_values[1],
        'I_630': out_values[2],
        'I_clear': out_values[3]
    }
    df_oneRow = pd.DataFrame([flat_data])
    
    df = pd.concat([df, df_oneRow], ignore_index=True)

print(df.shape)
df.head(3)

(5, 7)


Unnamed: 0,R,G,B,I_445,I_515,I_630,I_clear
0,0.9,0.5,0.6,19888,35221,30901,65535
1,1.0,0.8,0.2,7964,54316,34207,65535
2,0.8,1.0,0.6,20253,65535,28784,65535


In [7]:
# Split train, val, test
from sklearn.model_selection import train_test_split

df_train_val, df_test = train_test_split(df, test_size=0.2, random_state=42)
df_train, df_val = train_test_split(df_train_val, test_size=0.25, random_state=42)

X_train = df_train[['R', 'G', 'B']]
Y_train = df_train[['I_445', 'I_515', 'I_630', 'I_clear']]
X_val = df_val[['R', 'G', 'B']]
Y_val = df_val[['I_445', 'I_515', 'I_630', 'I_clear']]
X_test = df_test[['R', 'G', 'B']]
Y_test = df_test[['I_445', 'I_515', 'I_630', 'I_clear']]

X_train.head(3)

Unnamed: 0,R,G,B
3,0.0,0.1,0.9
4,0.1,0.8,0.4
0,0.9,0.5,0.6


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor

# Train MultiOutput Random Forest
base_model = RandomForestRegressor(n_estimators=100, random_state=42)
multi_rf = MultiOutputRegressor(base_model)
multi_rf.fit(X_train, Y_train)

In [None]:
# Predict on training data
Y_pred = multi_rf.predict(X)
df_pred = pd.DataFrame(Y_pred, columns=['I_445_pred', 'I_515_pred', 'I_630_pred', 'I_clear_pred'])

# Display predictions
print(pd.concat([X, df_pred], axis=1))

In [None]:
# train rf

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor


X = data[['R', 'G', 'B']]
Y = data[['I_445', 'I_515', 'I_630', 'I_clear']]

# Step 3: Train a separate RF model for each output (to get per-target uncertainty)
n_estimators = 100
models = {}
all_preds = {}
all_stds = {}

for column in Y.columns:
    rf = RandomForestRegressor(n_estimators=n_estimators)
    rf.fit(X, Y[column])
    models[column] = rf

    # Uncertainty: std across tree predictions
    preds = np.stack([tree.predict(X) for tree in rf.estimators_], axis=0)
    mean_preds = preds.mean(axis=0)
    std_preds = preds.std(axis=0)

    all_preds[column] = mean_preds
    all_stds[column + '_std'] = std_preds

# Step 4: Assemble results into a DataFrame
df_pred = pd.DataFrame(all_preds)
df_std = pd.DataFrame(all_stds)

results = pd.concat([X.reset_index(drop=True), df_pred, df_std], axis=1)

# Display results
print(results)
