# ***LIBARY***

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

import cv2
import os
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
import random

from PIL import Image
from torch.utils.data import Dataset,DataLoader, random_split
from sklearn.model_selection import train_test_split
from datetime import datetime
from tqdm import tqdm

import warnings 
warnings.filterwarnings('ignore')

# ***READ DATA***

In [None]:
data_root_path = '/kaggle/input/sky-images-and-solar-radiation-measurement-dataset/7-18.9.2023/7-18.9.2023'

dir_train = []

for dir in os.listdir(data_root_path):
    dir_train.append(os.path.join(data_root_path,dir))

dir_train = sorted(dir_train)

In [None]:
dir_train

In [None]:
def parse_filename(filename):
    base = os.path.basename(filename)
    name = base.split('.jpg')[0] 
    time_str, radiation = name.split('_')
    dt = datetime.strptime(time_str, '%Y-%m-%d-%H-%M-%S')
    return dt, float(radiation)

data = []
for dir in dir_train:
    for file in os.listdir(dir):
        if file.endswith('.jpg'):
            dt, radiation = parse_filename(file)
            data.append({'datetime': dt, 'radiation': radiation, 'filename':os.path.join(dir,file)})


df = pd.DataFrame(data).sort_values('datetime')

In [None]:
len(df)

In [None]:
df.head()

In [None]:
df = df.reset_index(drop=True) 
df

In [None]:
df.isnull().sum()

In [None]:
start = '2023-09-07 05:00:06'
end = '2023-09-18 19:57:53'
df_filtered = df[(df['datetime'] >= start) & (df['datetime'] <= end)]

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(df_filtered['datetime'], df_filtered['radiation'],color = 'orange')
plt.title('Bức xạ mặt trời từ 2023-09-07 đến 2023-09-18')
plt.xlabel('Thời gian')
plt.ylabel('Bức xạ mặt trời')
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

# ***PLOT DATA***

In [None]:
idx_data = random.randint(1,2000)
data = cv2.imread(df['filename'][idx_data])
data = cv2.cvtColor(data, cv2.COLOR_BGR2RGB)
plt.imshow(data)
plt.title(f"{df['datetime'][idx_data]}_{df['radiation'][idx_data]}")
plt.axis('off')
plt.show()

# ***Feature Extraction Image***

In [None]:

def denormalize(tensor, mean, std):
    for t, m, s in zip(tensor, mean, std):
        t.mul_(s).add_(m)
    return tensor


image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225])
])


idx_data = random.randint(1,2000)
img_path = df['filename'][idx_data]
img = Image.open(img_path).convert('RGB')


augmented_img = image_transform(img)


augmented_img = denormalize(augmented_img.clone(), [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

np_img = augmented_img.permute(1, 2, 0).numpy()
plt.imshow(np.clip(np_img, 0, 1))
plt.title("Ảnh sau khi tăng cường dữ liệu")
plt.axis("off")
plt.show()

In [None]:
image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225])
])
resnet = models.resnet50(pretrained=True)

modules = list(resnet.children())[:-2] 
cnn_backbone = nn.Sequential(*modules, nn.AdaptiveAvgPool2d((1, 1))) 

feature_extractor = nn.Sequential(
    cnn_backbone,
    nn.Flatten(),              # (B, 2048)
    nn.Linear(2048, 512)       # → (B, 512)
).to(device)

In [None]:
def feature_extraction(filenames, feature_extractor):
    feature_extractor.eval()
    image_features = []

    for img_path in tqdm(filenames):
        img = Image.open(img_path).convert('RGB')
        img_tensor = image_transform(img).unsqueeze(0).to(device)

        with torch.no_grad():
            feat = feature_extractor(img_tensor)  # (1, 512)
            feat = feat.squeeze(0).cpu()          # (512,)
            image_features.append(feat)

    return image_features

image_features = feature_extraction(df['filename'], feature_extractor)
len(image_features)

# ***DATASET***

In [None]:
class RadiationDataset(Dataset):
    def __init__(self, df, image_features, sequence_length=60, forecast_horizon=30, stride=5):
        self.df = df
        self.image_features = image_features
        self.sequence_length = sequence_length
        self.forecast_horizon = forecast_horizon
        self.stride = stride

    def __len__(self):
        return (len(self.df) - self.sequence_length - self.forecast_horizon) // self.stride

    def __getitem__(self, idx):
        base_idx = idx * self.stride
        img_seq = []
        rad_seq = []

        for i in range(self.sequence_length):
            img_feat = self.image_features[base_idx + i]
            img_seq.append(img_feat)
            rad_seq.append(self.df.iloc[base_idx + i]['radiation'])

        img_seq = torch.stack(img_seq)
        rad_seq = torch.tensor(rad_seq, dtype=torch.float32).unsqueeze(-1)
        target = torch.tensor(self.df.iloc[base_idx + self.sequence_length + self.forecast_horizon - 1]['radiation'], dtype=torch.float32)

        return img_seq, rad_seq, target

In [None]:
sequence_length = 60
forecast_horizon = 30


split_time = int(0.8 * len(df))


train_df = df.iloc[:split_time].reset_index(drop=True)
val_df = df.iloc[split_time:].reset_index(drop=True)


train_image_features = image_features[:split_time]
val_image_features = image_features[split_time:]

train_dataset = RadiationDataset(
    df=train_df,
    image_features=train_image_features,
    sequence_length=sequence_length,
    forecast_horizon=forecast_horizon
)

val_dataset = RadiationDataset(
    df=val_df,
    image_features=val_image_features,
    sequence_length=sequence_length,
    forecast_horizon=forecast_horizon
)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=2)

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(train_df['datetime'], train_df['radiation'],color = 'blue')
plt.title('Bức xạ mặt trời từ 2023-09-07 đến 2023-09-18')
plt.xlabel('Thời gian')
plt.ylabel('Bức xạ mặt trời')
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(val_df['datetime'], val_df['radiation'],color = 'red')
plt.title('Bức xạ mặt trời từ 2023-09-07 đến 2023-09-18')
plt.xlabel('Thời gian')
plt.ylabel('Bức xạ mặt trời')
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
for images, radiation_series, target in train_loader:
    print("Images:", images.shape)  # (B, T, C, H, W)
    print("Radiation series:", radiation_series.shape)  # (B, T, 1)
    print("Target:", target.shape)  # (B,)
    break

# ***MODEL***

In [None]:
class MICNNLModel(nn.Module):
    def __init__(self):
        super(MICNNLModel, self).__init__()

        self.image_lstm = nn.LSTM(input_size=512, hidden_size=64, batch_first=True)
        self.ts_lstm = nn.LSTM(input_size=1, hidden_size=64, batch_first=True)

        self.dropout = nn.Dropout(p=0.4)
        self.norm_img = nn.LayerNorm(64)
        self.norm_rad = nn.LayerNorm(64)

        self.fc = nn.Sequential(
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 1)
        )

    def forward(self, img_feat_seq, rad_seq):
        img_out, _ = self.image_lstm(img_feat_seq)
        img_feat = self.norm_img(img_out[:, -1, :])
        img_feat = self.dropout(img_feat)

        rad_out, _ = self.ts_lstm(rad_seq)
        rad_feat = self.norm_rad(rad_out[:, -1, :])
        rad_feat = self.dropout(rad_feat)

        combined = torch.cat([img_feat, rad_feat], dim=1)
        output = self.fc(combined)
        return output.squeeze()

In [None]:
model = MICNNLModel()
model = model.to(device)


criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3,weight_decay=1e-5)

from torch.optim.lr_scheduler import ReduceLROnPlateau
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)

# ***TRAINNING AND VALIDATION***

In [None]:
num_epochs = 20
train_losses = []
val_losses = []

all_val_preds = []
all_val_targets = []

for epoch in range(num_epochs):
    
    model.train()
    running_train_loss = 0.0

    for images, rad_seq, target in tqdm(train_loader, desc="Training"):
        images = images.to(device)
        rad_seq = rad_seq.to(device)
        target = target.to(device)

        optimizer.zero_grad()
        outputs = model(images, rad_seq)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()

        running_train_loss += loss.item() * images.size(0)  # tổng loss

    epoch_train_loss = running_train_loss / len(train_loader.dataset)
    train_losses.append(epoch_train_loss)
    

    model.eval()
    running_val_loss = 0.0
    val_preds = []
    val_targets = []

    with torch.no_grad():
        for images, rad_seq, target in tqdm(val_loader, desc="Validation"):
            images = images.to(device)
            rad_seq = rad_seq.to(device)
            target = target.to(device)

            outputs = model(images, rad_seq)
            loss = criterion(outputs, target)

            running_val_loss += loss.item() * images.size(0)

            val_preds.extend(outputs.cpu().numpy())
            val_targets.extend(target.cpu().numpy())

    epoch_val_loss = running_val_loss / len(val_loader.dataset)
    val_losses.append(epoch_val_loss)
    all_val_preds.append(val_preds)
    all_val_targets.append(val_targets)

    scheduler.step(epoch_val_loss)

    print(f"Epoch [{epoch+1}/{num_epochs}]:Train Loss: {epoch_train_loss:.4f} - Val Loss: {epoch_val_loss:.4f}")

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

mae = mean_absolute_error(all_val_preds[-1], all_val_targets[-1])
print(f"Mean Absolute Error (MAE): {mae:.4f}")

mse = mean_squared_error(all_val_preds[-1], all_val_targets[-1])
rmse = np.sqrt(mse)
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")

r2 = r2_score(all_val_preds[-1], all_val_targets[-1])
print(f"R-squared (R²): {r2:.4f}")

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Losss')
plt.title('Loss Train vs. Loss Val')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(all_val_targets[-1], label='Ground Truth')
plt.plot(all_val_preds[-1], label='Prediction')
plt.title('Radiation Prediction vs. Ground Truth')
plt.xlabel('Sample Index')
plt.ylabel('Radiation')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
torch.save(model.state_dict(), 'ResnetLSTM_model.pth')