In [None]:
import numpy as np
import pandas as pd
import os
import torch
from torchvision import transforms
from torch.utils.data import DataLoader, Subset
from torch import nn
from torch import optim
from scipy.interpolate import CubicSpline
from sklearn.preprocessing import MinMaxScaler

In [None]:
%run ./utilities/constants.ipynb

In [None]:
%run ./utilities/utils.py

In [None]:
%run ./utilities/models.py

In [None]:
%run ./utilities/training_utils.py

In [None]:
# Paths to data
path_kp = os.path.join(DATA_DIR, "kp_data.txt")
path_img_specs = os.path.join(DATA_DIR, "image_specs.csv")

In [None]:
# Load kp data
data_kp = pd.read_csv(path_kp)
data_kp["datetime"] = pd.to_datetime(data_kp["datetime"])

# Load image specs data
img_specs = pd.read_csv(path_img_specs)
img_specs["datetime"] = pd.to_datetime(img_specs["datetime"])

image_filenames = img_specs["filename"].values
image_dates = img_specs["datetime"].values

In [None]:
image_timestamps = image_dates.astype("int64") // 10**9
kp_timestamps = data_kp["datetime"].values.astype("int64") // 10**9
kp_index_interpolated = CubicSpline(kp_timestamps, data_kp["Kp"].values)(
    image_timestamps
)

In [None]:
df = pd.DataFrame(
    {
        "Timestamp": image_timestamps,
        "Image_filename": image_filenames,
        "Kp": kp_index_interpolated,
    }
)

In [None]:
day = 24 * 60 * 60
year = 365.2425 * day
# Synodic carrington rotation of sun
cycle = 27.2753 * day
df["day_sin"] = np.sin(image_timestamps * (2 * np.pi / day))
df["day_cos"] = np.cos(image_timestamps * (2 * np.pi / day))
df["cycle_sin"] = np.sin(image_timestamps * (2 * np.pi / cycle))
df["cycle_cos"] = np.cos(image_timestamps * (2 * np.pi / cycle))
df["year_sin"] = np.sin(image_timestamps * (2 * np.pi / year))
df["year_cos"] = np.cos(image_timestamps * (2 * np.pi / year))

In [None]:
# Scaling the numerical data
kp_scaler = MinMaxScaler(feature_range=(-1, 1))
timestamp_scaler = MinMaxScaler(feature_range=(-1, 1))

df["Kp"] = kp_scaler.fit_transform(df["Kp"].values.reshape(-1, 1))
df["Timestamp"] = timestamp_scaler.fit_transform(
    df["Timestamp"].values.reshape(-1, 1)
)

In [None]:
seq_length = 7
sequences = create_sequence(df, seq_length)

In [None]:
# img_transform = transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
img_transform = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])

In [None]:
dataset = ImageAndKpDataset(sequences, PREPROCESSED_IMAGE_DIR, img_transform)

In [None]:
# Split training data into training and validation data:
full_len = len(dataset)
train_frac = 0.1 # 0.9
train_size = int(full_len * train_frac)
train_data = Subset(dataset, range(0, train_size))
val_data = Subset(dataset, range(train_size, full_len))
batch_size = 4

# Create PyTorch dataloaders for data:
train_loader = DataLoader(train_data, batch_size=batch_size, pin_memory=True)
test_loader = DataLoader(val_data, batch_size=batch_size, pin_memory=True)

In [None]:
n_epochs = 10
loss_fn = nn.MSELoss(reduction="mean")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SolarImageKpModel()
optimizer = optim.Adam(model.parameters())
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
model.to(device)

In [None]:
train_hist, test_hist = train_model(model, optimizer, loss_fn, train_loader, test_loader, device, n_epochs, scheduler=scheduler)