In [18]:
import sys
sys.path.append('../')

from src.cmesrc.config import CMESRCV2_DB, LASCO_CME_DATABASE
import sqlite3
import pandas as pd

In [2]:
conn = sqlite3.connect(CMESRCV2_DB)
cur = conn.cursor()
cme_id_pa = pd.read_sql("""
SELECT C.cme_id, C.cme_pa FROM CMES C
""", conn)
cme_id_pa.head()

Unnamed: 0,cme_id,cme_pa
0,19960111001436,267.0
1,19960113220830,265.0
2,19960115070110,262.0
3,19960122031101,105.0
4,19960126091619,90.0


In [3]:
import numpy as np

old_ids = cme_id_pa["cme_id"].astype(int).to_numpy()

# Need to choose cme_pa[:-2] for each row
new_ids = cme_id_pa["cme_id"].astype(str).to_numpy() + np.array([f"{int(i):03}" if not np.isnan(i) else "999" for i in cme_id_pa["cme_pa"]])

new_ids

array(['19960111001436267', '19960113220830265', '19960115070110262', ...,
       '20220831110006227', '20220831141205227', '20220831174805228'],
      dtype=object)

In [4]:
# Check all ids are unique
assert len(new_ids) == len(set(new_ids))

In [5]:
# Now create translation dictionary

translation_dict = {
    old_id : new_id for old_id, new_id in zip(old_ids, new_ids)
}

In [14]:
conn = sqlite3.connect(CMESRCV2_DB)
cursor = conn.cursor()

cur.executescript("""
CREATE INDEX IF NOT EXISTS idx_cmes_cme_id ON CMES(cme_id);
CREATE INDEX IF NOT EXISTS idx_chsc_cme_id ON CMES_HARPS_SPATIALLY_CONSIST(cme_id);
CREATE INDEX IF NOT EXISTS idx_che_cme_id ON CMES_HARPS_EVENTS(cme_id);
CREATE INDEX IF NOT EXISTS idx_fcha_cme_id ON FINAL_CME_HARP_ASSOCIATIONS(cme_id);
CREATE INDEX IF NOT EXISTS idx_ms_cme_id ON MAJUMDAR_SRC(cme_id);
CREATE INDEX IF NOT EXISTS idx_hbb_pres_prev_cme_id ON HARPS_BBOX(prev_present_at_cme_id);
CREATE INDEX IF NOT EXISTS idx_hbb_pres_next_cme_id ON HARPS_BBOX(next_present_at_cme_id);
""")

conn.commit()
conn.close()

In [15]:
import sqlite3

# Connect to your SQLite database
conn = sqlite3.connect(CMESRCV2_DB)
cursor = conn.cursor()

# Step 1: Disable foreign key constraints
cursor.execute("PRAGMA foreign_keys = OFF;")

# Step 3: Update cme_id values in CMES table
for old_id, new_id in translation_dict.items():
    cursor.execute(f"UPDATE CMES SET cme_id = ? WHERE cme_id = ?", (int(new_id), int(old_id)))

# Step 4: Update foreign key references in all other tables
tables_to_update = ['CMES_HARPS_SPATIALLY_CONSIST', 'CMES_HARPS_EVENTS', 'FINAL_CME_HARP_ASSOCIATIONS', 'MAJUMDAR_SRC']

for table in tables_to_update:
    for old_id, new_id in translation_dict.items():
        cursor.execute(f"UPDATE {table} SET cme_id = ? WHERE cme_id = ?", (int(new_id), int(old_id)))

for old_id, new_id in translation_dict.items():
    cursor.execute("UPDATE HARPS_BBOX SET prev_present_at_cme_id = ? WHERE prev_present_at_cme_id = ?", (int(new_id), int(old_id)))
    cursor.execute("UPDATE HARPS_BBOX SET next_present_at_cme_id = ? WHERE next_present_at_cme_id = ?", (int(new_id), int(old_id)))

# Step 5: Re-enable foreign key constraints
cursor.execute("PRAGMA foreign_keys = ON;")

# Commit the changes and close the connection

<sqlite3.Cursor at 0x7f5971cc41c0>

In [16]:
conn.commit()
conn.close()

In [13]:
conn.close()

In [19]:
# Now to fill the CMEs I dropped because they were duplicates

cmes = pd.read_csv(LASCO_CME_DATABASE)
cmes.replace({np.nan: None}, inplace=True)
no_poor_cmes = cmes[cmes["CME_QUALITY"] == 0]
len(no_poor_cmes)

13311

In [29]:
cmes[cmes.duplicated(subset=["CME_ID"], keep=False)]

Unnamed: 0,CME_ID,CME_DATE,CME_PA,CME_WIDTH,CME_LINEAR_SPEED,CME_2ND_ORDER_INITIAL_SPEED,CME_2ND_ORDER_FINAL_SPEED,CME_2ND_ORDER_20R_SPEED,CME_ACCELERATION,CME_MASS,CME_KINETIC_ENERGY,CME_MPA,CME_HALO,CME_SEEN_IN,CME_QUALITY,CME_THREE_POINTS
232,ID19970208183005,1997-02-08 18:30:05,325.0,2,379.0,248.0,502.0,699.0,,,,320,0,0,0,0
233,ID19970208183005,1997-02-08 18:30:05,289.0,8,158.0,270.0,53.0,0.0,,,,281,0,0,1,3
368,ID19970710172603,1997-07-10 17:26:03,256.0,30,91.0,105.0,76.0,0.0,,,,263,0,0,1,0
369,ID19970710172603,1997-07-10 17:26:03,91.0,52,209.0,87.0,328.0,898.0,,,,89,0,1,1,3
517,ID19971105121042,1997-11-05 12:10:42,302.0,15,603.0,601.0,605.0,612.0,,,,300,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32464,ID20220715082405,2022-07-15 08:24:05,273.0,18,277.0,115.0,434.0,1219.0,,,,272,0,1,2,0
32465,ID20220715082405,2022-07-15 08:24:05,72.0,28,386.0,411.0,362.0,0.0,,,,76,0,1,1,0
32521,ID20220729123636,2022-07-29 12:36:36,113.0,12,323.0,231.0,419.0,999.0,,,,115,0,1,1,0
32522,ID20220729123636,2022-07-29 12:36:36,319.0,22,425.0,220.0,620.0,1251.0,,,,319,0,1,1,0


In [35]:
conn = sqlite3.connect(CMESRCV2_DB)
cur = conn.cursor()

not_matched = []

for i, row in cmes.iterrows():
    cme_pa = f"{int(row['CME_PA']):03}" if not pd.isna(row["CME_PA"]) else "999"
    cme_id = str(row["CME_ID"][2:]) + cme_pa

    cur.execute("SELECT cme_id FROM CMES WHERE cme_id = ?", (int(cme_id),))

    matches = cur.fetchall()

    exists = True if len(matches) > 0 else False

    if len(matches) > 1:
        raise ValueError("More than one match")

    if not exists:
        cur.execute(
            """
            INSERT INTO cmes (cme_id, cme_date, cme_pa, cme_width, cme_halo, cme_seen_in, cme_quality, cme_three_points)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?)
            """,
            (
                cme_id,
                row["CME_DATE"],
                row["CME_PA"],
                row["CME_WIDTH"],
                row["CME_HALO"],
                row["CME_SEEN_IN"],
                row["CME_QUALITY"],
                row["CME_THREE_POINTS"]
            )
        )

In [36]:
conn.commit()
conn.close()

In [58]:
# And I need to update their image_timestamps
from datetime import datetime
import bisect
from tqdm import tqdm

conn = sqlite3.connect(CMESRCV2_DB)
cur = conn.cursor()

image_timestamps = cur.execute("SELECT timestamp FROM images").fetchall()
image_timestamps = [t[0] for t in image_timestamps]
timestamps = sorted([datetime.strptime(t, "%Y-%m-%d %H:%M:%S") for t in image_timestamps])

new_added = cur.execute("SELECT cme_id FROM CMES WHERE image_timestamp IS NULL").fetchall()

# Function to find the closest timestamp using binary search
def closest_timestamp(target, sorted_timestamps):
    index = bisect.bisect_left(sorted_timestamps, target)
    if index == 0:
        return sorted_timestamps[0]
    if index == len(sorted_timestamps):
        return sorted_timestamps[-1]
    before = sorted_timestamps[index - 1]
    after = sorted_timestamps[index]
    if after - target < target - before:
       return after
    else:
       return before

for row in tqdm(cur.execute("SELECT cme_id, cme_date FROM CMES WHERE image_timestamp IS NULL").fetchall()):
    cme_timestamp = datetime.strptime(row[1], "%Y-%m-%d %H:%M:%S")
    image_timestamp = closest_timestamp(cme_timestamp, timestamps)
    cur.execute("UPDATE CMES SET image_timestamp = ? WHERE cme_id = ?", (image_timestamp.strftime("%Y-%m-%d %H:%M:%S"), row[0]))

100%|██████████| 742/742 [00:00<00:00, 25037.20it/s]


In [59]:
conn.commit()
conn.close()

In [50]:
new_added = [new[0] for new in new_added]

In [54]:
conn = sqlite3.connect(CMESRCV2_DB)
cur = conn.cursor()

cur.execute("""CREATE TABLE IF NOT EXISTS NEW_ADDED_CMES (
            cme_id INTEGER PRIMARY KEY REFERENCES CMES(cme_id)
)""")

for new in new_added:
    cur.execute("INSERT INTO NEW_ADDED_CMES (cme_id) VALUES (?);", (new,))

In [57]:
conn.commit()
conn.close()