# SOHO analysis 
The purpose of this notebook is to analysis visual-spectrum images of the sun from the SOHO imaging satellite at the L1 Lagrange point (meaning the images are from the same perspective as the Earth) and use this data to determine the rotational period of the sun.

In [1]:
import os
from datetime import datetime

Recompute = False #Make this True to run the computation from scratch (takes > 1 minute)

# Function to extract the image paths and their timestamps
def get_files_with_times(root_dir = "data"):
    file_paths = []
    times = []
    for day_dir in sorted(os.listdir(root_dir)):
        day_path = os.path.join(root_dir,day_dir)
        if os.path.isdir(day_path):
            for file in sorted(os.listdir(day_path)):
                if file.endswith(".jpg"):
                    time_str = file.split('_')[1] # Extract time (hhmm)
                    time = datetime.strptime(f"{day_dir}_{time_str}", r"%Y%m%d_%H%M")
                    file_paths.append(os.path.join(day_path,file))
                    times.append(time)
    return file_paths, times

file_paths, times = get_files_with_times()

In [2]:
import matplotlib.pyplot as plt
from ipywidgets import interact, Dropdown
from utils.image_processing import detect_sunspots
import cv2

# Create a dropdown to select days
day_dirs = sorted([d for d in os.listdir("data") if os.path.isdir(os.path.join("data", d))])
@interact(day=Dropdown(options=day_dirs, description="Select Day:"))
def show_day_images(day):
    day_path = os.path.join("data", day)
    files = sorted([f for f in os.listdir(day_path) if f.endswith(".jpg")])[:16] #Only the first 12 images
    
    fig, axes = plt.subplots(3, 4, figsize=(15, 10))
    for ax, file in zip(axes.flat, files):
        img, centroids, solar_center, solar_radius = detect_sunspots(os.path.join(day_path, file))
        print(centroids)
        ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        ax.scatter([c[0] for c in centroids], [c[1] for c in centroids], s=5, c='red')
        ax.set_title(file.split('_')[1])  # Show time (hhmm)
        ax.axis('off')
    plt.tight_layout()

interactive(children=(Dropdown(description='Select Day:', options=('20250422', '20250423', '20250424', '202504…

The cell above contains the detected features highlighted in red with a dropdown to select which day's data to look at.

In [3]:
'''
This block computes the longitudinal angular-velocity of the sunspots between different
images via a nearst neighbour algorithm. Due to the sheer amount of computation (and inefficient code),
this block takes about 1~2 mins to run. Thus, the data is saved in a JSON file called sunspot_data.json.
If you want to run this block, you must change <Recompute> to True in the first code block of this notebook.
'''

if Recompute:
    from utils.feature_tracking import SunspotTracker
    import json
    _, _, solar_center, solar_radius = detect_sunspots(file_paths[0]) #Initial value for solar radius from the first image
    tracker = SunspotTracker(solar_center, solar_radius, 1)

    #main feature tracking loop
    for img, time in zip(file_paths, times):
        img, centroids, solar_center, solar_radius = detect_sunspots(img)
        
        tracker.process_frame(time, centroids)
    velocities = tracker.get_all_velocities() # These velocities are purely longitudinal
    
    #Write the data to JSON
    for entry in tracker.tracks:
        if 'times' in entry:
            entry['times'] = [t.isoformat() if isinstance(t, datetime) else t for t in entry['times']]
   
    with open(file='sunspot_data.json',mode='w') as f:
        json.dump(tracker.tracks, f, indent=4)

%%Important thing to remember is that the longitudinal velocity is higher the closer you get to the equator. Additionally, the sun spins on a not-perfectly vertical axis, so that might influence velocities as well. Individual velocities are sporadic, but we can perform extra data analysis to try and extract the mitigating and random effects and isolate the rotation.

In [4]:
import json
from datetime import datetime

with open("sunspot_data.json", 'r') as f:
    data = json.load(f)

for entry in data:
    if 'times' in entry:
        entry['times'] = [datetime.fromisoformat(t) if isinstance(t, datetime) else t for t in entry['times']]

data

[{'positions': [[231, 361],
   [233, 361],
   [236, 362],
   [239, 362],
   [242, 362],
   [245, 362],
   [247, 362],
   [250, 362],
   [253, 362],
   [256, 362],
   [259, 362],
   [261, 362],
   [264, 362],
   [267, 362],
   [270, 362],
   [272, 362],
   [275, 363],
   [278, 363]],
  'times': ['2025-04-22T09:00:00',
   '2025-04-22T10:30:00',
   '2025-04-22T12:00:00',
   '2025-04-22T13:30:00',
   '2025-04-22T15:00:00',
   '2025-04-22T16:30:00',
   '2025-04-22T18:00:00',
   '2025-04-22T19:30:00',
   '2025-04-22T21:00:00',
   '2025-04-22T22:30:00',
   '2025-04-23T00:00:00',
   '2025-04-23T01:30:00',
   '2025-04-23T03:00:00',
   '2025-04-23T04:30:00',
   '2025-04-23T06:00:00',
   '2025-04-23T07:30:00',
   '2025-04-23T09:00:00',
   '2025-04-23T10:30:00'],
  'velocities': [5.262828931422064,
   -0.9867187490399374,
   1.3515859855619965,
   1.409500339578699,
   1.4632961411593897,
   5.428834784772789,
   1.5438763501279027,
   1.5870378194053956,
   1.626365676889236,
   1.661935227363301