In [None]:
import pandas as pd

ny_path = "/home/fuest/EnData/data/pecanstreet/15minute_data_newyork.csv"
austin_path = "/home/fuest/EnData/data/pecanstreet/15minute_data_austin.csv"
cali_path = "/home/fuest/EnData/data/pecanstreet/15minute_data_california.csv"

ny_data = pd.read_csv(ny_path)
austin_data = pd.read_csv(austin_path)
cali_data = pd.read_csv(cali_path)

ny_user_ids = ny_data.dataid.unique()
austin_user_ids = austin_data.dataid.unique()
cali_user_ids = cali_data.dataid.unique()
austin_user_ids

In [None]:
a = train_dataset.dataset.data
a[(a.month==7) & (a.weekday==0)]

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import itertools
from data_utils.dataset import PecanStreetDataset

def plot_grid_profile(df, month, weekday):
    # Filter the DataFrame based on the specified month and weekday
    filtered_df = df[(df['month'] == month) & (df['weekday'] == weekday)]
    
    # Check if there are any rows after filtering
    if filtered_df.empty:
        print(f"No data available for month {month} and weekday {weekday}.")
        return

    # Convert the 'grid' column to a list of arrays
    grid_values = filtered_df['grid'].apply(np.array).values

    # Calculate the average grid values for each 15-minute interval across all filtered rows
    averaged_grid = np.mean(np.vstack(grid_values), axis=0)

    # Generate 96 timestamps for a day at 15-minute intervals
    timestamps = pd.date_range(start='00:00', end='23:45', freq='15T').strftime('%H:%M')

    # Ensure that the lengths match
    if len(averaged_grid) != len(timestamps):
        raise ValueError(f"Length of averaged_grid ({len(averaged_grid)}) does not match length of timestamps ({len(timestamps)}).")

    # Plot the averaged grid profile
    plt.figure(figsize=(12, 6))
    plt.plot(timestamps, averaged_grid, marker='o')
    plt.title(f'Grid Profile for Month {month} and Weekday {weekday}')
    plt.xlabel('Time of Day')
    plt.ylabel('Grid Values')
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.tight_layout()
    plt.show()

df = PecanStreetDataset(normalize=False).data
plot_grid_profile(df, month=5, weekday=3)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from data_utils.dataset import PecanStreetDataset

def plot_grid_profile(df, month, weekday, dataid=None, ax=None):
    filtered_df = df[(df['month'] == month) & (df['weekday'] == weekday)]
    if dataid is not None:
        filtered_df = filtered_df[filtered_df['dataid'] == dataid]
    
    if filtered_df.empty:
        print(f"No data available for month {month} and weekday {weekday} with dataid {dataid}.")
        return

    grid_values = filtered_df['grid'].apply(np.array).values
    averaged_grid = np.mean(np.vstack(grid_values), axis=0)
    timestamps = pd.date_range(start='00:00', end='23:45', freq='15T').strftime('%H:%M')

    if len(averaged_grid) != len(timestamps):
        raise ValueError(f"Length of averaged_grid ({len(averaged_grid)}) does not match length of timestamps ({len(timestamps)}).")
    
    if ax is None:
        plt.figure(figsize=(12, 6))
        plt.plot(timestamps, averaged_grid, marker='o')
        title = f'Grid Profile for Month {month} and Weekday {weekday}'
        if dataid is not None:
            title += f' (DataID: {dataid})'
        plt.title(title)
        plt.xlabel('Time of Day')
        plt.ylabel('Grid Values')
        plt.xticks(rotation=45)
        plt.grid(True)
        plt.tight_layout()
        plt.show()
    else:
        ax.plot(timestamps, averaged_grid, marker='o', label=f'DataID: {dataid}')

def plot_all_users_grid_profile(df, month, weekday):
    unique_dataids = df['dataid'].unique()
    fig, ax = plt.subplots(figsize=(12, 6))
    for dataid in unique_dataids:
        plot_grid_profile(df, month, weekday, dataid, ax=ax)
    
    title = f'Grid Profile for Month {month} and Weekday {weekday} for All Users'
    plt.title(title)
    plt.xlabel('Time of Day')
    plt.ylabel('Grid Values')
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()

df = PecanStreetDataset(normalize=True).data
plot_all_users_grid_profile(df, month=11, weekday=3)

In [None]:
def plot_daily_usage_profiles(df, dataid, month, weekday):
    # Filter the DataFrame based on the specified user id, month, and weekday
    filtered_df = df[(df['dataid'] == dataid) & (df['month'] == month) & (df['weekday'] == weekday)]
    
    # Check if there are any rows after filtering
    if filtered_df.empty:
        print(f"No data available for user {dataid}, month {month}, and weekday {weekday}.")
        return

    grid_values = filtered_df['grid'].apply(np.array).values
    timestamps = pd.date_range(start='00:00', end='23:45', freq='15T').strftime('%H:%M')

    plt.figure(figsize=(12, 6))
    for i, daily_grid in enumerate(grid_values):
        plt.plot(timestamps, daily_grid, marker='o', label=f'Day {i+1}')

    title = f'Daily Usage Profiles for User {dataid}, Month {month}, Weekday {weekday}'
    plt.title(title)
    plt.xlabel('Time of Day')
    plt.ylabel('Grid Values')
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()

df = PecanStreetDataset(normalize=False).data
plot_daily_usage_profiles(df, dataid=3687, month=5, weekday=0)

In [None]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt

from data_utils.dataset import PecanStreetDataset, prepare_dataloader, split_dataset
from generator.acgan import ACGAN  

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

data = PecanStreetDataset(normalize=True, user_id=27)
train_dataset, val_dataset = split_dataset(data)
model = ACGAN(
    input_dim=1,
    noise_dim=512,
    embedding_dim=512,
    output_dim=96,
    learning_rate=1e-4,
    weight_path="runs/",
)
model.train(train_dataset, val_dataset, batch_size=32, num_epoch=100)

def generate_and_plot_series(model, day_labels, month_labels, data, month, weekday):
    series1 = model.generate([day_labels, month_labels]).squeeze()
    series2 = model.generate([day_labels, month_labels]).squeeze()
    series3 = model.generate([day_labels, month_labels]).squeeze()

    timestamps = pd.date_range(start='00:00', periods=96, freq='15T').strftime('%H:%M')

    filtered_data = data[(data['month'] == month) & (data['weekday'] == weekday)]
    real_profiles = filtered_data.sample(3)['grid'].values
    real_profile1 = np.array(real_profiles[0])
    real_profile2 = np.array(real_profiles[1])
    real_profile3 = np.array(real_profiles[2])

    # Plot all series on the same plot
    plt.figure(figsize=(15, 6))
    plt.plot(timestamps, series1, label='Generated Profile 1')
    plt.plot(timestamps, series2, label='Generated Profile 2')
    plt.plot(timestamps, series3, label='Generated Profile 3')
    plt.plot(timestamps, real_profile1, label='Real Profile 1', linestyle='--')
    plt.plot(timestamps, real_profile2, label='Real Profile 2', linestyle='--')
    plt.plot(timestamps, real_profile3, label='Real Profile 3', linestyle='--')
    plt.xlabel('Time')
    plt.ylabel('Value')
    plt.title('Generated and Real Time Series Profiles')
    plt.xticks(rotation=45)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

day_labels = torch.tensor([6]).to(device)
month_labels = torch.tensor([5]).to(device)

generate_and_plot_series(model, day_labels, month_labels, data.data, 5, 6)

In [2]:
import pandas as pd
import torch
import numpy as np
import matplotlib.pyplot as plt

from generator.acgan import ACGAN  
from data_utils.dataset import PecanStreetDataset, split_dataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def plot_range_with_values(df, colname, values_to_compare, month, weekday):
    values_to_compare = values_to_compare.cpu().numpy()
    filtered_df = df[(df['month'] == month) & (df['weekday'] == weekday)]
    array_data = np.array(filtered_df[colname].to_list())
    min_values = np.min(array_data, axis=0)
    max_values = np.max(array_data, axis=0)
    timestamps = pd.date_range(start='00:00', end='23:45', freq='15T').strftime('%H:%M')
    
    plt.figure(figsize=(15, 7))
    plt.fill_between(timestamps, min_values, max_values, color='gray', alpha=0.5, label='Range of values')
    plt.plot(timestamps, values_to_compare, color='blue', marker='o', label='Values to Compare')
    
    plt.title('Range of Values and Comparison')
    plt.xlabel('Time of Day')
    plt.ylabel('Values')
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    plt.show()

data = PecanStreetDataset(normalize=True, user_id=1642, include_generation=False)
dataset = data.data

train_dataset, val_dataset = split_dataset(data)
model = ACGAN(
    input_dim=1,
    noise_dim=512,
    embedding_dim=512,
    window_length=96,
    learning_rate=1e-4,
    weight_path="runs/",
)
model.train(train_dataset, val_dataset, batch_size=32, num_epoch=25)

Epoch 1: 100%|██████████| 9/9 [00:00<00:00, 13.84it/s]


Epoch [1/25], Mean MMD Loss: [0.02135648]


Epoch 2: 100%|██████████| 9/9 [00:00<00:00, 22.64it/s]


Epoch [2/25], Mean MMD Loss: [0.022359]


Epoch 3: 100%|██████████| 9/9 [00:00<00:00, 21.50it/s]


Epoch [3/25], Mean MMD Loss: [0.0231376]


Epoch 4: 100%|██████████| 9/9 [00:00<00:00, 22.12it/s]


Epoch [4/25], Mean MMD Loss: [0.02634294]


Epoch 5: 100%|██████████| 9/9 [00:00<00:00, 25.47it/s]


Epoch [5/25], Mean MMD Loss: [0.02827037]


Epoch 6: 100%|██████████| 9/9 [00:00<00:00, 22.63it/s]


Epoch [6/25], Mean MMD Loss: [0.03129256]


Epoch 7: 100%|██████████| 9/9 [00:00<00:00, 24.78it/s]


Epoch [7/25], Mean MMD Loss: [0.02662139]


Epoch 8: 100%|██████████| 9/9 [00:00<00:00, 24.61it/s]


Epoch [8/25], Mean MMD Loss: [0.0213123]


Epoch 9: 100%|██████████| 9/9 [00:00<00:00, 23.67it/s]


Epoch [9/25], Mean MMD Loss: [0.01865871]


Epoch 10: 100%|██████████| 9/9 [00:00<00:00, 27.16it/s]


Epoch [10/25], Mean MMD Loss: [0.01713818]


Epoch 11: 100%|██████████| 9/9 [00:00<00:00, 28.80it/s]


Epoch [11/25], Mean MMD Loss: [0.01149425]


Epoch 12: 100%|██████████| 9/9 [00:00<00:00, 22.90it/s]


Epoch [12/25], Mean MMD Loss: [0.01057291]


Epoch 13: 100%|██████████| 9/9 [00:00<00:00, 28.75it/s]


Epoch [13/25], Mean MMD Loss: [0.00902534]


Epoch 14: 100%|██████████| 9/9 [00:00<00:00, 23.72it/s]


Epoch [14/25], Mean MMD Loss: [0.00612254]


Epoch 15: 100%|██████████| 9/9 [00:00<00:00, 22.85it/s]


Epoch [15/25], Mean MMD Loss: [0.00590151]


Epoch 16: 100%|██████████| 9/9 [00:00<00:00, 23.64it/s]


Epoch [16/25], Mean MMD Loss: [0.00517305]


Epoch 17: 100%|██████████| 9/9 [00:00<00:00, 24.99it/s]


Epoch [17/25], Mean MMD Loss: [0.00470294]


Epoch 18: 100%|██████████| 9/9 [00:00<00:00, 20.87it/s]


Epoch [18/25], Mean MMD Loss: [0.00540652]


Epoch 19: 100%|██████████| 9/9 [00:00<00:00, 25.95it/s]


Epoch [19/25], Mean MMD Loss: [0.00610686]


Epoch 20: 100%|██████████| 9/9 [00:00<00:00, 25.31it/s]


Epoch [20/25], Mean MMD Loss: [0.00627095]


Epoch 21: 100%|██████████| 9/9 [00:00<00:00, 25.49it/s]


Epoch [21/25], Mean MMD Loss: [0.00681573]


Epoch 22: 100%|██████████| 9/9 [00:00<00:00, 27.36it/s]


Epoch [22/25], Mean MMD Loss: [0.00603556]


Epoch 23: 100%|██████████| 9/9 [00:00<00:00, 24.05it/s]


Epoch [23/25], Mean MMD Loss: [0.00697963]


Epoch 24: 100%|██████████| 9/9 [00:00<00:00, 27.77it/s]


Epoch [24/25], Mean MMD Loss: [0.00716923]


Epoch 25: 100%|██████████| 9/9 [00:00<00:00, 25.43it/s]


Epoch [25/25], Mean MMD Loss: [0.00643573]


In [33]:
from datetime import datetime, timedelta
import pandas as pd

def generate_synthetic_data_for_eval(df: pd.DataFrame, model):
    syn_ts = []

    for _, row in df.iterrows():
        month_label = torch.tensor([row["month"]]).to(device)
        day_label = torch.tensor([row["weekday"]]).to(device)
        gen_ts = model.generate([month_label, day_label]).squeeze().cpu().numpy()
        syn_ts.append((row["month"], row["weekday"], row["date_day"], gen_ts))

    syn_ts_df = pd.DataFrame(syn_ts, columns=["month", "weekday", "date_day", "generated_ts"])
    #syn_ts_df["timestamps"] = syn_ts_df.apply(lambda x: generate_timestamps_for_row(x), axis=1)

    #generated_data_long_list = np.array([item for sublist in syn_ts_df['generated_ts'] for item in sublist]).reshape(-1, 1)
    #generated_timetstamp_long_list = np.array([item for sublist in syn_ts_df['timestamps'] for item in sublist]).reshape(-1, 1)
    #syn_array = np.concatenate((generated_data_long_list, generated_timetstamp_long_list), axis=1)

    return syn_ts_df #syn_array

def generate_timestamps_for_row(row, year=2023):
    """
    Generate a list of timestamps for a given row based on month, weekday, and date_day.
    
    Args:
        row (pd.Series): A row of the dataframe with columns 'month', 'weekday', 'date_day'.
        year (int): The year for which to generate the dates. Default is 2023.
        
    Returns:
        List[datetime]: A list of timestamps for each 15-minute interval of the specified day.
    """
    month = row['month'] + 1  # Convert 0-based month to 1-based month
    date_day = row['date_day']
    base_date = datetime(2023, month, date_day)
    
    # Generate 15-minute interval timestamps for the entire day
    timestamps = [base_date + timedelta(minutes=15*i) for i in range(96)]

    return timestamps


syn_dataframe = generate_synthetic_data_for_eval(dataset, model)
syn_dataframe

Unnamed: 0,month,weekday,date_day,generated_ts
0,0,1,2,"[0.59788185, 0.69071037, 0.83522326, 0.8451232..."
1,0,1,9,"[0.56822556, 0.6249575, 0.8038964, 0.8336613, ..."
2,0,1,16,"[0.5865626, 0.64459753, 0.80494887, 0.802107, ..."
3,0,1,23,"[0.58915174, 0.6456545, 0.8118451, 0.7998799, ..."
4,0,1,30,"[0.58009064, 0.6925638, 0.8383365, 0.83358705,..."
...,...,...,...,...
350,11,3,27,"[0.5859992, 0.6917852, 0.6920727, 0.62693155, ..."
351,11,4,7,"[0.657403, 0.77560043, 0.80194217, 0.8510719, ..."
352,11,4,14,"[0.656007, 0.80829847, 0.79520875, 0.86283946,..."
353,11,4,21,"[0.6426559, 0.7746102, 0.76724887, 0.8354615, ..."


In [37]:
from eval.metrics import Context_FID
import numpy as np

ori = np.array(dataset["grid"])
ori = ori.reshape(ori.shape[0], 1, 1)

syn = np.array(syn_dataframe["generated_ts"])
syn = syn.reshape(syn.shape[0], 1, 1)

In [39]:
fid = Context_FID(ori, syn)

TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

In [41]:
type(syn)

numpy.ndarray