In [73]:
# Add project root to path using pathlib
import sys

from pathlib import Path

sys.path.append(str(Path.cwd().parent))

from src.cmesrc.config import CMESRC_DB, HARPS_LIFETIME_DATABSE, SDOML_TIMESTAMP_INFO
from src.cmesrc.utils import read_SWAN_filepath, filepaths_updated_swan_data
from tqdm import tqdm
import sqlite3
import pandas as pd
import pickle

In [2]:
with open(SDOML_TIMESTAMP_INFO, "rb") as f:
    sdoml_timestamp_info = pickle.load(f)

In [3]:
timestamps = list(sdoml_timestamp_info.keys())
indices = [entry["index"] for entry in sdoml_timestamp_info.values()]

In [4]:
con = sqlite3.connect(CMESRC_DB)
con.execute("PRAGMA foreign_keys = ON")
cur = con.cursor()

In [5]:
cur.execute("PRAGMA table_info(images);")
results = cur.fetchall()
print(results)

[(0, 'timestamp', 'TEXT', 1, None, 0), (1, 'year', 'INTEGER', 1, None, 0), (2, 'month', 'INTEGER', 1, None, 0), (3, 'day', 'INTEGER', 1, None, 0), (4, 'hour', 'INTEGER', 1, None, 0), (5, 'minute', 'INTEGER', 1, None, 0), (6, 'second', 'INTEGER', 1, None, 0), (7, 'idx', 'INTEGER', 1, None, 0)]


In [6]:
i = 0
years = []
months = []
days = []
hours = []
minutes = []
seconds = []
for timestamp in timestamps:
    # Get the year, month, day, hour, minute and seconds from the timestamp
    year = int(timestamp[:4])
    month = int(timestamp[5:7])
    day = int(timestamp[8:10])
    hour = int(timestamp[11:13])
    minute = int(timestamp[14:16])
    second = int(timestamp[17:19])

    years.append(year)
    months.append(month)
    days.append(day)
    hours.append(hour)
    minutes.append(minute)
    seconds.append(second)


In [7]:
new_data = [(timestamp, year, month, day, hour, minute, second, idx) for timestamp, year, month, day, hour, minute, second, idx in zip(timestamps, years, months, days, hours, minutes, seconds, indices)]
new_data[:5]

[('2010-05-01 00:12:00', 2010, 5, 1, 0, 12, 0, 0),
 ('2010-05-01 00:24:00', 2010, 5, 1, 0, 24, 0, 1),
 ('2010-05-01 00:36:00', 2010, 5, 1, 0, 36, 0, 2),
 ('2010-05-01 00:48:00', 2010, 5, 1, 0, 48, 0, 3),
 ('2010-05-01 01:00:00', 2010, 5, 1, 1, 0, 0, 4)]

In [8]:
cur.executemany("INSERT INTO images (timestamp, year, month, day, hour, minute, second, idx) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(timestamp) DO NOTHING", new_data)

<sqlite3.Cursor at 0x7fa976e37420>

In [9]:
cur.execute("SELECT * FROM images LIMIT 5;")
print(cur.fetchall())

[('2010-05-01 00:12:00', 2010, 5, 1, 0, 12, 0, 0), ('2010-05-01 00:24:00', 2010, 5, 1, 0, 24, 0, 1), ('2010-05-01 00:36:00', 2010, 5, 1, 0, 36, 0, 2), ('2010-05-01 00:48:00', 2010, 5, 1, 0, 48, 0, 3), ('2010-05-01 01:00:00', 2010, 5, 1, 1, 0, 0, 4)]


In [10]:
con.commit()

In [11]:
df = pd.read_csv(HARPS_LIFETIME_DATABSE)
df.rename(
    columns={
        "harpsnum": "harpnum"
        }, 
    inplace=True
    )

df["harpnum"] = df["harpnum"].astype(int)
df["start"] = df["start"].astype(str).apply(lambda x: x[:-4])
df["end"] = df["end"].astype(str).apply(lambda x: x[:-4])
df.dtypes

harpnum     int64
start      object
end        object
dtype: object

In [56]:
DELETE = True
if DELETE:
    yes = input("Delete for sure?")
    if yes in ["yes", "y", "Y"]:
        cur.execute("DELETE FROM harps;")
        con.commit()

cur.execute("SELECT * FROM harps LIMIT 5;")
print(cur.fetchall())

IntegrityError: FOREIGN KEY constraint failed

In [57]:
new_data = df.values

for data in new_data:
    try:
        cur.execute("INSERT INTO harps (harpnum, start, end) VALUES (?, ?, ?) ON CONFLICT(harpnum) DO NOTHING", data)
    except sqlite3.IntegrityError as e:
        cur.execute("SELECT COUNT(*) FROM images WHERE timestamp = ?", (data[1],))
        start_exists = cur.fetchone()[0]
        cur.execute("SELECT COUNT(*) FROM images WHERE timestamp = ?", (data[2],))
        end_exists = cur.fetchone()[0]

        closest_start = data[1]
        closest_end = data[2]
#        print(data[0])

        if not start_exists:
            cur.execute("SELECT timestamp FROM images WHERE timestamp >= ? ORDER BY timestamp ASC LIMIT 1;", (closest_start,))
#            cur.execute("SELECT timestamp FROM images WHERE timestamp >= ? ORDER BY ABS(strftime('%s', timestamp) - strftime('%s', ?)) LIMIT 1;", (closest_start,))
            closest_start = cur.fetchone()[0]
#            print("START", closest_start, data[1])
        if not end_exists:
#            cur.execute("SELECT timestamp FROM images ORDER BY ABS(strftime('%s', timestamp) - strftime('%s', ?)) LIMIT 1;", (closest_end,))
            cur.execute("SELECT timestamp FROM images WHERE timestamp <= ? ORDER BY timestamp DESC LIMIT 1;", (closest_end,))
            closest_end = cur.fetchone()[0]
#            print("END", closest_end, data[2])

        cur.execute("INSERT INTO harps (harpnum, start, end) VALUES (?, ?, ?) ON CONFLICT(harpnum) DO NOTHING", (int(data[0]), closest_start, closest_end))

In [58]:
cur.execute("SELECT COUNT(*) FROM harps LIMIT 1;")
assert cur.fetchone()[0] == 4098

In [59]:
cur.execute("SELECT * FROM harps LIMIT 5;")
print(cur.fetchall())

[(1, '2010-05-01 00:12:00', '2010-05-11 16:12:00'), (2, '2010-05-01 00:12:00', '2010-05-05 12:00:00'), (5, '2010-05-01 00:12:00', '2010-05-03 05:12:00'), (6, '2010-05-01 00:12:00', '2010-05-05 08:36:00'), (8, '2010-05-02 14:48:00', '2010-05-08 17:00:00')]


In [60]:
con.commit()

In [61]:
cur.close()
con.close()

#### This part is to add the HARPS bounding boxes

In [62]:
from astropy.time import Time

In [63]:
con = sqlite3.connect(CMESRC_DB)
con.execute("PRAGMA foreign_keys = ON")
cur = con.cursor()

In [81]:
DELETE = True
if DELETE:
    yes = input("Delete for sure?")
    if yes in ["yes", "y", "Y"]:
        cur.execute("DELETE FROM harps_bbox;")
        con.commit()

cur.execute("SELECT * FROM harps_bbox LIMIT 5;")
print(cur.fetchall())

[]


In [82]:
# Print the name of the columns in the harps_bbox table

cur.execute("PRAGMA table_info(harps_bbox);")

results = cur.fetchall()

print(results)

[(0, 'harpnum', 'INTEGER', 0, None, 1), (1, 'timestamp', 'TEXT', 0, None, 2), (2, 'LONDTMIN', 'REAL', 0, None, 0), (3, 'LONDTMAX', 'REAL', 0, None, 0), (4, 'LATDTMIN', 'REAL', 0, None, 0), (5, 'LATDTMAX', 'REAL', 0, None, 0), (6, 'IRBB', 'INTEGER', 0, None, 0)]


In [84]:
SWAN = filepaths_updated_swan_data()
for harpnum, filepath in tqdm(SWAN.items()):
    data = read_SWAN_filepath(filepath)
    cur.execute("SELECT START, END FROM harps WHERE HARPNUM = ?", (int(harpnum),))
    start, end = cur.fetchone()

    start = Time(start)
    end = Time(end)

    data = data[(data["Timestamp"] >= start) & (data["Timestamp"] <= end)]
    df_data = data[["Timestamp", "LONDTMIN", "LONDTMAX", "LATDTMIN", "LATDTMAX", "IRBB"]].values
    new_data = [(int(harpnum), str(timestamp.to_value("iso")[:-4]), float(lonmin), float(lonmax), float(latmin), float(latmax), int(irbb)) for timestamp, lonmin, lonmax, latmin, latmax, irbb in df_data]

    for data in new_data:
        try:
            cur.execute("INSERT INTO harps_bbox (harpnum, timestamp, LONDTMIN, LONDTMAX, LATDTMIN, LATDTMAX, IRBB) VALUES (?, ?, ?, ?, ?, ?, ?) ON CONFLICT(harpnum, timestamp) DO NOTHING", data)
        except sqlite3.IntegrityError as e:
            cur.execute("SELECT COUNT(*) FROM images WHERE timestamp = ?", (data[1],))
            timestamp_exists = cur.fetchone()[0]

            if "FOREIGN KEY" in e.args[0] and not timestamp_exists:
                continue
            else:
                raise e

[H[2J
==CACHING SWAN DATA.==



100%|██████████| 4098/4098 [00:00<00:00, 985282.76it/s]


[H[2J

100%|██████████| 4098/4098 [03:13<00:00, 21.23it/s]


In [85]:
con.commit()

In [91]:
cur.execute("SELECT harpnum FROM harps_bbox WHERE timestamp = '2014-05-02 00:00:00' AND IRBB = 0;")
print(cur.fetchall())

[(4087,), (4075,), (4064,), (4067,), (4084,), (4066,), (4042,), (4086,), (4082,), (4083,), (4076,), (4088,), (4071,), (4073,)]


In [88]:
cur.execute("SELECT * FROM harps_bbox WHERE IRBB = 1 LIMIT 5;")
print(cur.fetchall())

[(7288, '2018-07-21 17:24:00', -16.629700500000002, -12.6134995, 17.539801000000004, 19.867599, 1), (7288, '2018-07-21 17:36:00', -16.5203005, -12.5040995, 17.539801000000004, 19.867599, 1), (7288, '2018-07-21 17:48:00', -16.402499499999998, -12.3863005, 17.539801000000004, 19.867599, 1), (1979, '2012-09-03 05:24:00', -1.5240005000000003, 16.6547005, 10.8261, 21.3915, 1), (1979, '2012-09-03 05:36:00', -1.4141905, 16.7645105, 10.8261, 21.3915, 1)]
