In [None]:
import pandas as pd

ny_path = "/home/fuest/EnData/data/pecanstreet/15minute_data_newyork.csv"
austin_path = "/home/fuest/EnData/data/pecanstreet/15minute_data_austin.csv"
cali_path = "/home/fuest/EnData/data/pecanstreet/15minute_data_california.csv"

ny_data = pd.read_csv(ny_path)
austin_data = pd.read_csv(austin_path)
cali_data = pd.read_csv(cali_path)

ny_user_ids = ny_data.dataid.unique()
austin_user_ids = austin_data.dataid.unique()
cali_user_ids = cali_data.dataid.unique()
austin_user_ids

In [None]:
a = train_dataset.dataset.data
a[(a.month==7) & (a.weekday==0)]

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import itertools
from data_utils.dataset import PecanStreetDataset

def plot_grid_profile(df, month, weekday):
    # Filter the DataFrame based on the specified month and weekday
    filtered_df = df[(df['month'] == month) & (df['weekday'] == weekday)]
    
    # Check if there are any rows after filtering
    if filtered_df.empty:
        print(f"No data available for month {month} and weekday {weekday}.")
        return

    # Convert the 'grid' column to a list of arrays
    grid_values = filtered_df['grid'].apply(np.array).values

    # Calculate the average grid values for each 15-minute interval across all filtered rows
    averaged_grid = np.mean(np.vstack(grid_values), axis=0)

    # Generate 96 timestamps for a day at 15-minute intervals
    timestamps = pd.date_range(start='00:00', end='23:45', freq='15T').strftime('%H:%M')

    # Ensure that the lengths match
    if len(averaged_grid) != len(timestamps):
        raise ValueError(f"Length of averaged_grid ({len(averaged_grid)}) does not match length of timestamps ({len(timestamps)}).")

    # Plot the averaged grid profile
    plt.figure(figsize=(12, 6))
    plt.plot(timestamps, averaged_grid, marker='o')
    plt.title(f'Grid Profile for Month {month} and Weekday {weekday}')
    plt.xlabel('Time of Day')
    plt.ylabel('Grid Values')
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.tight_layout()
    plt.show()

df = PecanStreetDataset(normalize=False).data
plot_grid_profile(df, month=5, weekday=3)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from data_utils.dataset import PecanStreetDataset

def plot_grid_profile(df, month, weekday, dataid=None, ax=None):
    filtered_df = df[(df['month'] == month) & (df['weekday'] == weekday)]
    if dataid is not None:
        filtered_df = filtered_df[filtered_df['dataid'] == dataid]
    
    if filtered_df.empty:
        print(f"No data available for month {month} and weekday {weekday} with dataid {dataid}.")
        return

    grid_values = filtered_df['grid'].apply(np.array).values
    averaged_grid = np.mean(np.vstack(grid_values), axis=0)
    timestamps = pd.date_range(start='00:00', end='23:45', freq='15T').strftime('%H:%M')

    if len(averaged_grid) != len(timestamps):
        raise ValueError(f"Length of averaged_grid ({len(averaged_grid)}) does not match length of timestamps ({len(timestamps)}).")
    
    if ax is None:
        plt.figure(figsize=(12, 6))
        plt.plot(timestamps, averaged_grid, marker='o')
        title = f'Grid Profile for Month {month} and Weekday {weekday}'
        if dataid is not None:
            title += f' (DataID: {dataid})'
        plt.title(title)
        plt.xlabel('Time of Day')
        plt.ylabel('Grid Values')
        plt.xticks(rotation=45)
        plt.grid(True)
        plt.tight_layout()
        plt.show()
    else:
        ax.plot(timestamps, averaged_grid, marker='o', label=f'DataID: {dataid}')

def plot_all_users_grid_profile(df, month, weekday):
    unique_dataids = df['dataid'].unique()
    fig, ax = plt.subplots(figsize=(12, 6))
    for dataid in unique_dataids:
        plot_grid_profile(df, month, weekday, dataid, ax=ax)
    
    title = f'Grid Profile for Month {month} and Weekday {weekday} for All Users'
    plt.title(title)
    plt.xlabel('Time of Day')
    plt.ylabel('Grid Values')
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()

df = PecanStreetDataset(normalize=True).data
plot_all_users_grid_profile(df, month=11, weekday=3)

In [None]:
def plot_daily_usage_profiles(df, dataid, month, weekday):
    # Filter the DataFrame based on the specified user id, month, and weekday
    filtered_df = df[(df['dataid'] == dataid) & (df['month'] == month) & (df['weekday'] == weekday)]
    
    # Check if there are any rows after filtering
    if filtered_df.empty:
        print(f"No data available for user {dataid}, month {month}, and weekday {weekday}.")
        return

    grid_values = filtered_df['grid'].apply(np.array).values
    timestamps = pd.date_range(start='00:00', end='23:45', freq='15T').strftime('%H:%M')

    plt.figure(figsize=(12, 6))
    for i, daily_grid in enumerate(grid_values):
        plt.plot(timestamps, daily_grid, marker='o', label=f'Day {i+1}')

    title = f'Daily Usage Profiles for User {dataid}, Month {month}, Weekday {weekday}'
    plt.title(title)
    plt.xlabel('Time of Day')
    plt.ylabel('Grid Values')
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()

df = PecanStreetDataset(normalize=False).data
plot_daily_usage_profiles(df, dataid=3687, month=5, weekday=0)

In [None]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt

from data_utils.dataset import PecanStreetDataset, prepare_dataloader, split_dataset
from generator.acgan import ACGAN  

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

data = PecanStreetDataset(normalize=True, user_id=27)
train_dataset, val_dataset = split_dataset(data)
model = ACGAN(
    input_dim=1,
    noise_dim=512,
    embedding_dim=512,
    output_dim=96,
    learning_rate=1e-4,
    weight_path="runs/",
)
model.train(train_dataset, val_dataset, batch_size=32, num_epoch=100)

def generate_and_plot_series(model, day_labels, month_labels, data, month, weekday):
    series1 = model.generate([day_labels, month_labels]).squeeze()
    series2 = model.generate([day_labels, month_labels]).squeeze()
    series3 = model.generate([day_labels, month_labels]).squeeze()

    timestamps = pd.date_range(start='00:00', periods=96, freq='15T').strftime('%H:%M')

    filtered_data = data[(data['month'] == month) & (data['weekday'] == weekday)]
    real_profiles = filtered_data.sample(3)['grid'].values
    real_profile1 = np.array(real_profiles[0])
    real_profile2 = np.array(real_profiles[1])
    real_profile3 = np.array(real_profiles[2])

    # Plot all series on the same plot
    plt.figure(figsize=(15, 6))
    plt.plot(timestamps, series1, label='Generated Profile 1')
    plt.plot(timestamps, series2, label='Generated Profile 2')
    plt.plot(timestamps, series3, label='Generated Profile 3')
    plt.plot(timestamps, real_profile1, label='Real Profile 1', linestyle='--')
    plt.plot(timestamps, real_profile2, label='Real Profile 2', linestyle='--')
    plt.plot(timestamps, real_profile3, label='Real Profile 3', linestyle='--')
    plt.xlabel('Time')
    plt.ylabel('Value')
    plt.title('Generated and Real Time Series Profiles')
    plt.xticks(rotation=45)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

day_labels = torch.tensor([6]).to(device)
month_labels = torch.tensor([5]).to(device)

generate_and_plot_series(model, day_labels, month_labels, data.data, 5, 6)

In [None]:
import pandas as pd
import torch
import numpy as np
import matplotlib.pyplot as plt

from generator.acgan import ACGAN  
from data_utils.dataset import PecanStreetDataset, split_dataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def plot_range_with_values(df, colname, values_to_compare, month, weekday):
    values_to_compare = values_to_compare.cpu().numpy()
    filtered_df = df[(df['month'] == month) & (df['weekday'] == weekday)]
    array_data = np.array(filtered_df[colname].to_list())
    min_values = np.min(array_data, axis=0)
    max_values = np.max(array_data, axis=0)
    timestamps = pd.date_range(start='00:00', end='23:45', freq='15T').strftime('%H:%M')
    
    plt.figure(figsize=(15, 7))
    plt.fill_between(timestamps, min_values, max_values, color='gray', alpha=0.5, label='Range of values')
    plt.plot(timestamps, values_to_compare, color='blue', marker='o', label='Values to Compare')
    
    plt.title('Range of Values and Comparison')
    plt.xlabel('Time of Day')
    plt.ylabel('Values')
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    plt.show()

data = PecanStreetDataset(normalize=True, user_id=1642, include_generation=False)
dataset = data.data

train_dataset, val_dataset = split_dataset(data)
model = ACGAN(
    input_dim=1,
    noise_dim=512,
    embedding_dim=512,
    window_length=96,
    learning_rate=1e-4,
    weight_path="runs/",
)
model.train(train_dataset, val_dataset, batch_size=32, num_epoch=25)

In [None]:
from datetime import datetime, timedelta
import pandas as pd

def generate_synthetic_data_for_eval(df: pd.DataFrame, model):
    syn_ts = []

    for _, row in df.iterrows():
        month_label = torch.tensor([row["month"]]).to(device)
        day_label = torch.tensor([row["weekday"]]).to(device)
        gen_ts = model.generate([month_label, day_label]).squeeze().cpu().numpy()
        syn_ts.append((row["month"], row["weekday"], row["date_day"], gen_ts))

    syn_ts_df = pd.DataFrame(syn_ts, columns=["month", "weekday", "date_day", "generated_ts"])
    #syn_ts_df["timestamps"] = syn_ts_df.apply(lambda x: generate_timestamps_for_row(x), axis=1)

    #generated_data_long_list = np.array([item for sublist in syn_ts_df['generated_ts'] for item in sublist]).reshape(-1, 1)
    #generated_timetstamp_long_list = np.array([item for sublist in syn_ts_df['timestamps'] for item in sublist]).reshape(-1, 1)
    #syn_array = np.concatenate((generated_data_long_list, generated_timetstamp_long_list), axis=1)

    return syn_ts_df #syn_array


syn_dataframe = generate_synthetic_data_for_eval(dataset, model)
syn_dataframe

In [1]:
from data_utils.dataset import PecanStreetDataset, split_dataset
import pandas as pd

normalized_dataset = PecanStreetDataset(normalize=True, user_id=1642, include_generation=False)
original_dataset = PecanStreetDataset(normalize=False, user_id=1642, include_generation=False)

In [2]:
normalized_dataset.inverse_transform(normalized_dataset.data, 1642, 'grid')

Unnamed: 0,month,weekday,grid
0,0,0,"[1.0440000000000007, 0.8620000000000004, 0.918..."
1,0,0,"[0.931000000000001, 0.8169999999999996, 1.1219..."
2,0,0,"[1.453, 2.5940000000000003, 2.5680000000000005..."
3,0,1,"[2.1719999999999997, 1.6219999999999997, 1.383..."
4,0,1,"[0.9550000000000001, 0.8519999999999996, 0.851..."
...,...,...,...
350,11,6,"[0.27200000000000035, 2.7509999999999994, 4.55..."
351,11,6,"[1.468, 0.38799999999999996, 0.542999999999999..."
352,11,6,"[0.4279999999999999, 0.34600000000000036, 2.33..."
353,11,6,"[0.3380000000000001, 0.6760000000000004, 0.340..."


In [3]:
original_dataset.data

Unnamed: 0,dataid,month,date_day,weekday,grid
8,1642,0,8,0,"[1.044, 0.862, 0.918, 0.578, 0.462, 0.462, 0.4..."
15,1642,0,15,0,"[0.931, 0.817, 1.122, 1.364, 1.147, 0.853, 1.0..."
22,1642,0,22,0,"[1.453, 2.594, 2.568, 2.487, 1.982, 1.654, 1.3..."
2,1642,0,2,1,"[2.172, 1.622, 1.384, 1.473, 2.184, 2.419, 2.3..."
9,1642,0,9,1,"[0.955, 0.852, 0.851, 0.823, 0.868, 0.871, 1.1..."
...,...,...,...,...,...
333,1642,11,2,6,"[0.272, 2.751, 4.556, 4.378, 4.432, 3.79, 1.24..."
340,1642,11,9,6,"[1.468, 0.388, 0.543, 0.398, 0.466, 0.392, 0.4..."
347,1642,11,16,6,"[0.428, 0.346, 2.337, 3.963, 3.868, 4.219, 4.1..."
354,1642,11,23,6,"[0.338, 0.676, 0.341, 0.648, 0.329, 0.656, 0.3..."
