In [13]:
import os
import libsql
import json
import pandas as pd
import requests
from dotenv import load_dotenv

load_dotenv()

url = os.getenv("CLAUDELIGHT_DB_URL")
auth_token = os.getenv("CLAUDELIGHT_RW")
conn = libsql.connect("claude_light.db", sync_url=url, auth_token=auth_token)
conn.sync()

In [14]:
from datetime import datetime
from zoneinfo import ZoneInfo

def convert_time(str_data_UnixTime):
    dt_data = datetime.fromtimestamp(float(str_data_UnixTime), ZoneInfo("America/New_York"))
    day_of_year = int(dt_data.timetuple().tm_yday)
    time_of_day = round(float(dt_data.hour + dt_data.minute/60 + dt_data.second/3600), 3)
    return day_of_year, time_of_day


In [15]:
OPEN_WEATHER_MAP_KEY = os.getenv('OPEN_WEATHER_MAP_KEY')

def request_weather_cloud(str_data_UnixTime):
    # Get Pittsburgh Coordinates
    lat = 40.4406
    lon = -79.9959
    
    # Call weather API
    url = "https://api.openweathermap.org/data/3.0/onecall/timemachine"
    params = {
        "lat": lat,
        "lon": lon,
        "dt": int(str_data_UnixTime),
        "appid": OPEN_WEATHER_MAP_KEY,
    }
    response = requests.get(url, params=params)
    data = response.json()
    weather_main = data['data'][0]['weather'][0]['main']
    cloud_cover_percent = data['data'][0]['clouds']
    
    # Convert weather to int
    # Get weather conditions from website
    list_weather_classes = ['Clear', 'Clouds', 'Rain', 'Snow', 'Drizzle',
                   'Thunderstorm', 'Mist', 'Fog', 'Haze']
    dict_weather_classes_index = {label: idx for idx, label in enumerate(list_weather_classes)}
    weather_index = dict_weather_classes_index.get(weather_main, -1)

    # print("Weather:", weather_main)
    # print("Cloud Cover (%):", cloud_cover_percent)
    return weather_index, cloud_cover_percent


In [16]:
# Read data from db and covert to df

df = pd.DataFrame()
# for rowid, data in conn.execute("""select * from measurements where json_extract(data, '$.tag') = '20250731'""").fetchall():
for rowid, data in conn.execute("""select * from measurements where json_extract(data, '$.tag') IS NOT NULL""").fetchall():
    j = json.loads(data)
    
    R, G, B = j['in']
    selected_outputs = ['445nm', '515nm', '630nm', 'clear']
    out_values = [j['out'][key] for key in selected_outputs]
    str_data_UnixTime = j['t0']
    
    day_of_year, time_of_day = convert_time(str_data_UnixTime)
    weather_index, cloud_cover_percent = request_weather_cloud(str_data_UnixTime)
    
    flat_data = {
        'R': R,
        'G': G,
        'B': B,
        'day_of_year': day_of_year,
        'time_of_day': time_of_day,
        'weather_index': weather_index,
        'cloud_cover_percent': cloud_cover_percent,
        'I_445': out_values[0],
        'I_515': out_values[1],
        'I_630': out_values[2],
        'I_clear': out_values[3]
    }
    df_oneRow = pd.DataFrame([flat_data])
    
    df = pd.concat([df, df_oneRow], ignore_index=True)

print(df.shape)
df.head(3)

(58, 11)


Unnamed: 0,R,G,B,day_of_year,time_of_day,weather_index,cloud_cover_percent,I_445,I_515,I_630,I_clear
0,0.0,0.0,0.0,212,13.326,1,96,844,1689,2127,13266
1,0.9,0.5,0.6,212,13.911,1,100,19888,35221,30901,65535
2,1.0,0.8,0.2,212,13.911,1,100,7964,54316,34207,65535


In [17]:
df.to_csv('data_processed/full_data.csv', index=False)