In [None]:
import pandas as pd
import numpy as np
import os
from datetime import datetime, timedelta
import torch
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformer import TransformerModel
from utils.mask import generate_tgt_mask

def prepare_prediction_data(input_str, base_path, scaler):
    """
    Prepare data for prediction based on input string.
    
    Args:
        input_str (str): The input string specifying the prediction target (e.g., "20240117090001").
        base_path (str): Path to the folder containing location-specific CSV files.
        scaler (MinMaxScaler): Scaler used to transform features.
    
    Returns:
        torch.Tensor: Prepared input tensor for the model.
    """
    year, month, day, hour, minute, location = (
        input_str[:4],
        input_str[4:6],
        input_str[6:8],
        input_str[8:10],
        input_str[10:12],
        f"L{input_str[12:13]}"
    )
    target_datetime = datetime.strptime(f"{year}-{month}-{day} {hour}:{minute}", "%Y-%m-%d %H:%M")
    
    # Define the time ranges for the source data
    start_prev_day = target_datetime - timedelta(days=1, hours=target_datetime.hour - 7)
    end_prev_day = target_datetime - timedelta(days=1, hours=target_datetime.hour - 17)
    start_today = target_datetime - timedelta(hours=target_datetime.hour - 7)
    end_today = target_datetime - timedelta(minutes=target_datetime.minute)

    # Load data for the location
    data_path = os.path.join(base_path, f"{location}_Train_resampled.csv")
    df = pd.read_csv(data_path)
    df['DateTime'] = pd.to_datetime(df['DateTime'])

    # Filter the required data
    prev_day_data = df[(df['DateTime'] >= start_prev_day) & (df['DateTime'] <= end_prev_day)]
    today_data = df[(df['DateTime'] >= start_today) & (df['DateTime'] <= end_today)]

    # Combine data and extract features
    combined_data = pd.concat([prev_day_data, today_data])
    features = combined_data.drop(columns=["DateTime", "Power(mW)"]).values
    features = scaler.transform(features)  # Apply Min-Max scaling

    # Convert to tensor
    return torch.tensor(features, dtype=torch.float32).unsqueeze(0)  # Add batch dimension


In [None]:
import pandas as pd
import numpy as np
import os
from datetime import datetime, timedelta
import torch
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformer import TransformerModel
from utils.mask import generate_tgt_mask

def predict_single_data(model: TransformerModel, data:pd.DataFrame):
  
  return
  
def process_upload_data(upload_path):
  data = pd.read_csv(upload_path)
  data['DateTime'] = data['序號'].apply(lambda x: str(x)[:12])
  data['DateTime'] = pd.to_datetime(data['DateTime'])
  data['loactoin'] = data['序號'].apply(lambda x: str(x)[13:] if str(x)[12]== '0' else str(x)[12:] )
  return data

def main():
  upload_data = process_upload_data("dataset/upload.csv")
  for i in range(0, len(upload_data), 48):
    predicted_date, location_id = upload_data.iloc[i][['DateTime', 'loactoin']]
    location_data =pd.read_csv(f"./dataset/36_TrainingData_interpolation_process/L{location_id}_Train_resampled.csv")
    location_data.between_time() #find the data in predicted_date - 1day to predicted_date
    
    predict_single_data() # 
    
main()

# process_upload_data("dataset/upload.csv")

2024-01-17 09:00:00
1
2024-01-19 09:00:00
1
2024-02-24 09:00:00
1
2024-02-26 09:00:00
1
2024-03-27 09:00:00
1
2024-03-29 09:00:00
1
2024-04-28 09:00:00
1
2024-04-30 09:00:00
1
2024-05-15 09:00:00
1
2024-05-20 09:00:00
1
2024-06-13 09:00:00
1
2024-06-15 09:00:00
1
2024-07-03 09:00:00
1
2024-07-07 09:00:00
1
2024-01-25 09:00:00
2
2024-01-28 09:00:00
2
2024-02-13 09:00:00
2
2024-02-26 09:00:00
2
2024-03-19 09:00:00
2
2024-04-27 09:00:00
2
2024-05-08 09:00:00
2
2024-07-05 09:00:00
2
2024-07-07 09:00:00
2
2024-03-17 09:00:00
3
2024-04-14 09:00:00
3
2024-05-24 09:00:00
3
2024-05-26 09:00:00
3
2024-06-03 09:00:00
3
2024-06-20 09:00:00
3
2024-04-20 09:00:00
4
2024-04-22 09:00:00
4
2024-05-13 09:00:00
4
2024-05-15 09:00:00
4
2024-06-29 09:00:00
4
2024-07-04 09:00:00
4
2024-05-09 09:00:00
5
2024-05-14 09:00:00
5
2024-06-15 09:00:00
5
2024-06-20 09:00:00
5
2024-07-06 09:00:00
5
2024-07-08 09:00:00
5
2024-05-20 09:00:00
6
2024-05-27 09:00:00
6
2024-06-25 09:00:00
6
2024-06-29 09:00:00
6
2024-07-06

In [5]:
import pandas as pd
import numpy as np
import os
from datetime import datetime, timedelta
import torch
from sklearn.preprocessing import MinMaxScaler
from dataloader import SolarPowerDataset
from transformer import TransformerModel
from utils.mask import generate_tgt_mask


def predict_single_data(model: TransformerModel, data: pd.DataFrame, scaler: MinMaxScaler):
    """
    Predict solar power for a single input dataset.

    Args:
        model (TransformerModel): Trained model for prediction.
        data (pd.DataFrame): Input data for prediction (72 timesteps).
        scaler (MinMaxScaler): Scaler used for normalizing data during training.

    Returns:
        np.array: Predicted power for 48 timesteps.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    print(type(data))
    # Preprocess the input data
    data['Power(mW)'] = scaler.transform(data['Power(mW)'])  # Normalize using the same scaler as training
    input_data = torch.tensor(data, dtype=torch.float32).unsqueeze(0).to(device)  # (1, 72, features)

    # Prepare input for the decoder (12 known timesteps from the second day)
    tgt_input = input_data[:, -12:, :]  # (1, 12, features)

    # Perform autoregressive prediction
    with torch.no_grad():
        for _ in range(48):
            tgt_mask = generate_tgt_mask(tgt_input.size(1)).to(device)  # Update mask
            output = model(input_data, tgt_input, tgt_mask=tgt_mask)  # Model prediction
            next_step = output[:, -1:, :]  # Get the last timestep prediction
            tgt_input = torch.cat([tgt_input, next_step], dim=1)  # Append the prediction

    # Extract the predicted values and inverse transform
    predicted_power = tgt_input[:, 12:, :].squeeze(0).cpu().numpy()  # (48, features)
    predicted_power = scaler.inverse_transform(predicted_power)[:, 0]  # Extract power (first feature)
    return predicted_power


def process_upload_data(upload_path):
    """
    Process the upload.csv file into a structured DataFrame.

    Args:
        upload_path (str): Path to the upload.csv file.

    Returns:
        pd.DataFrame: Structured DataFrame with parsed DateTime and location.
    """
    data = pd.read_csv(upload_path)
    data['DateTime'] = data['序號'].apply(lambda x: str(x)[:12])
    data['DateTime'] = pd.to_datetime(data['DateTime'], format='%Y%m%d%H%M')
    data['location'] = data['序號'].apply(lambda x: str(x)[13:] if str(x)[12] == '0' else str(x)[12:])
    return data


def main():
    """
    Main function to process the public test dataset and generate predictions.
    """
    # Load the upload data
    upload_data = process_upload_data("dataset/upload.csv")

    # Initialize the prediction output
    predictions = []

    # Loop through each unique location and date pair
    for i in range(0, len(upload_data), 48):
        predicted_date, location_id = upload_data.iloc[i][['DateTime', 'location']]
        
        # Load location-specific training data
        location_data = pd.read_csv(f"./dataset/36_TrainingData_interpolation_process/L{location_id}_Train_resampled.csv")
        location_data['DateTime'] = pd.to_datetime(location_data['DateTime'])

        # Extract required input data for the model
        start_time = predicted_date - timedelta(days=1, hours=2, minutes=0)
        end_time = predicted_date + timedelta(hours=7, minutes=50)
        input_data = location_data.set_index('DateTime').loc[start_time:end_time]
        
        # Initialize and load the trained model
        model_path = f"./model_pth/v3/location_{location_id}/L_{location_id}_ep_2000.pth"
        model = TransformerModel(
            src_input_dim=12,
            tgt_input_dim=1,
            d_model=128,
            nhead=8,
            num_encoder_layers=5,
            num_decoder_layers=5,
            dim_feedforward=128,
            dropout=0.1
        )
        model.load_state_dict(torch.load(model_path))

        # Load the scaler used during training
        dataset = SolarPowerDataset(data_path=f"./dataset/36_TrainingData_interpolation_process/L{location_id}_Train_resampled.csv")
        scaler = dataset.scaler

        # Perform prediction
        predicted_power = predict_single_data(model, input_data, scaler)
        predictions.append(predicted_power)

    # Generate upload file
    flat_predictions = [val for sublist in predictions for val in sublist]
    upload_data['答案'] = flat_predictions
    upload_data.to_csv("upload_with_predictions.csv", index=False, encoding="utf-8-sig")
    print("Predictions saved to upload_with_predictions.csv")


if __name__ == "__main__":
    main()


<class 'pandas.core.frame.DataFrame'>


  model.load_state_dict(torch.load(model_path))


ValueError: Expected a 2-dimensional container but got <class 'pandas.core.series.Series'> instead. Pass a DataFrame containing a single row (i.e. single sample) or a single column (i.e. single feature) instead.

In [7]:
import pandas as pd
up = pd.read_csv('upload_with_predictions.csv')
up = up.drop(columns=['DateTime', 'location'])
up.to_csv('./v3_upload.csv', index=False)