In [None]:
import os
os.environ['OMP_NUM_THREADS'] = '4'
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import openai

# 4
openai.api_key = ''

# 3.5
# openai.api_key = ''

from data.serialize import SerializerSettings
from models.utils import grid_iter
from models.promptcast import get_promptcast_predictions_data
from models.darts import get_arima_predictions_data
from models.llmtime import get_llmtime_predictions_data
from models.gaussian_process import get_gp_predictions_data
from models.darts import get_TCN_predictions_data, get_NHITS_predictions_data, get_NBEATS_predictions_data
from data.small_context import get_datasets
from models.validation_likelihood_tuning import get_autotuned_predictions_data
import time
import pickle

%load_ext autoreload
%autoreload 2

from jax import vmap
import jax.numpy as jnp

plt.rc('font',family='Times New Roman')
plt.rcParams.update({'font.size': 15})


def plot_preds(train, test, pred_dict, model_name, var, end_time, show_samples=False):
    if model_name == 'gp':
        model_name = 'Gaussian Process'

    pred = pred_dict['median']
    pred = pd.Series(pred, index=test.index)
    plt.figure(figsize=(8, 6), dpi=100)
    ind = range(len(train)+len(test))
    plt.plot(ind[:len(train)], train.values)
    plt.plot(ind[len(train):], test.values, label='Truth', color='black')
    plt.plot(ind[len(train):], pred.values, label=model_name, color='purple')
    diff = test - pred
    mse = np.mean(diff**2)
    mae = np.mean(np.abs(diff))
    # crps = calculate_crps(test, pred)
    # shade 90% confidence interval
    samples = pred_dict['samples']
    lower = np.quantile(samples, 0.05, axis=0)
    upper = np.quantile(samples, 0.95, axis=0)
    plt.fill_between(ind[len(train):], lower, upper, alpha=0.3, color='purple')
    if show_samples:
        samples = pred_dict['samples']
        # convert df to numpy array
        samples = samples.values if isinstance(samples, pd.DataFrame) else samples
        for i in range(min(10, samples.shape[0])):
            plt.plot(ind[len(train):], samples[i], color='purple', alpha=0.3, linewidth=1)
    plt.xticks([0, 143, 168])
    plt.ylim(None, max(train.values)*1.3)
    plt.legend(loc='upper left')
    plt.title(var)

    if 'NLL/D' in pred_dict:
        nll = pred_dict['NLL/D']
        if nll is not None:
            plt.text(0.03, 0.75, f'NLL/D: {nll:.2f}', transform=plt.gca().transAxes, bbox=dict(facecolor='white', alpha=0.5))
        else:
            plt.text(0.03, 0.75, f'NLL/D: Null', transform=plt.gca().transAxes, bbox=dict(facecolor='white', alpha=0.5))
    else:
        plt.text(0.03, 0.75, f'NLL/D: Null', transform=plt.gca().transAxes, bbox=dict(facecolor='white', alpha=0.5))
    plt.text(0.03, 0.65, f'MSE: {mse:.2f}', transform=plt.gca().transAxes, bbox=dict(facecolor='white', alpha=0.5))
    plt.text(0.03, 0.55, f'MAE: {mae:.2f}', transform=plt.gca().transAxes, bbox=dict(facecolor='white', alpha=0.5))
    plt.text(0.03, 0.45, f'Runtime: {end_time:.2f}', transform=plt.gca().transAxes, bbox=dict(facecolor='white', alpha=0.5))
        # plt.text(0.03, 0.55, f'CRPS: {crps:.2f}', transform=plt.gca().transAxes, bbox=dict(facecolor='white', alpha=0.5))
    
    if not os.path.exists('/root/LLM/LLM_time/wind_power_results/'+var):
        os.makedirs('/root/LLM/LLM_time/wind_power_results/'+var)
    
    plt.savefig('/root/LLM/LLM_time/wind_power_results/{}/{}.pdf'.format(var, model_name), bbox_inches='tight')
    # plt.show()

## Define models ##

In [None]:
gpt4_hypers = dict(
    alpha=0.3,
    basic=True,
    temp=1.0,
    top_p=0.8,
    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-')
)

gpt3_hypers = dict(
    temp=0.7,
    alpha=0.95,
    beta=0.3,
    basic=False,
    settings=SerializerSettings(base=10, prec=3, signed=True, half_bin_correction=True)
)

promptcast_hypers = dict(
    temp=0.7,
    settings=SerializerSettings(base=10, prec=0, signed=True, 
                                time_sep=', ',
                                bit_sep='',
                                plus_sign='',
                                minus_sign='-',
                                half_bin_correction=False,
                                decimal_point='')
)

llama_hypers = dict(
    temp=1.0,
    alpha=0.99,
    beta=0.3,
    basic=False,
    settings=SerializerSettings(base=10, prec=3, time_sep=',', bit_sep='', plus_sign='', minus_sign='-', signed=True), 
)

arima_hypers = dict(p=[12,30], d=[1,2], q=[0])

gp_hypers = dict(lr=[5e-3, 1e-2, 5e-2, 1e-1])

TCN_hypers = dict(in_len=[10, 100, 400], out_len=[1],
    kernel_size=[3, 5], num_filters=[1, 3], 
    likelihood=['laplace', 'gaussian']
)

NHITS_hypers = dict(in_len=[10, 100, 400], out_len=[1],
    layer_widths=[64, 16], num_layers=[1, 2], 
    likelihood=['laplace', 'gaussian']
)

NBEATS_hypers = dict(in_len=[10, 100, 400], out_len=[1],
    layer_widths=[64, 16], num_layers=[1, 2], 
    likelihood=['laplace', 'gaussian']
)


model_hypers = {
    'LLMTime GPT-3.5': {'model': 'gpt-3.5-turbo-instruct', **gpt3_hypers},
    'LLMTime GPT-4': {'model': 'gpt-4', **gpt4_hypers},
    'LLMTime GPT-3': {'model': 'text-davinci-003', **gpt3_hypers},
    'PromptCast GPT-3': {'model': 'text-davinci-003', **promptcast_hypers},
    # 'llama-70b': {'model': 'llama-70b', **llama_hypers},
    'ARIMA': arima_hypers,
    'gp': gp_hypers,
    'TCN': TCN_hypers,
    'N-BEATS': NBEATS_hypers,
    'N-HiTS': NHITS_hypers
}

model_predict_fns = {
    'LLMTime GPT-3.5': get_llmtime_predictions_data,
    'LLMTime GPT-3': get_llmtime_predictions_data,
    'LLMTime GPT-4': get_llmtime_predictions_data,
    'PromptCast GPT-3': get_promptcast_predictions_data,
    # 'llama-70b': get_llmtime_predictions_data,
    'ARIMA': get_arima_predictions_data,
    'gp': get_gp_predictions_data,
    'TCN': get_TCN_predictions_data,
    'N-BEATS': get_NBEATS_predictions_data,
    'N-HiTS': get_NHITS_predictions_data
}

model_names = list(model_predict_fns.keys())

## Running LLMTime and Visualizing Results ##

In [None]:
# datasets = get_datasets()
# ds_name = 'AirPassengersDataset'
# data = datasets[ds_name]
# train, test = data # or change to your own data
# print(train.shape)
# print(test.shape)
# out = {}
# c = 's'
# for model in model_names: # GPT-4 takes a about a minute to run
#     model_hypers[model].update({'dataset_name': ds_name}) # for promptcast
#     hypers = list(grid_iter(model_hypers[model]))
#     num_samples = 10
#     pred_dict = get_autotuned_predictions_data(train, test, hypers, num_samples, model_predict_fns[model], verbose=False, parallel=False)
#     out[model] = pred_dict
#     plot_preds(train, test, pred_dict, model, c, show_samples=True)
#     break

In [3]:
import pandas as pd

df = pd.read_csv('/root/LLM/LLM_time/datasets/Turbine_Data.csv')
df

Unnamed: 0.1,Unnamed: 0,ActivePower,AmbientTemperatue,BearingShaftTemperature,Blade1PitchAngle,Blade2PitchAngle,Blade3PitchAngle,ControlBoxTemperature,GearboxBearingTemperature,GearboxOilTemperature,...,GeneratorWinding2Temperature,HubTemperature,MainBoxTemperature,NacellePosition,ReactivePower,RotorRPM,TurbineStatus,WTG,WindDirection,WindSpeed
0,2017-12-31 00:00:00+00:00,,,,,,,,,,...,,,,,,,,G01,,
1,2017-12-31 00:10:00+00:00,,,,,,,,,,...,,,,,,,,G01,,
2,2017-12-31 00:20:00+00:00,,,,,,,,,,...,,,,,,,,G01,,
3,2017-12-31 00:30:00+00:00,,,,,,,,,,...,,,,,,,,G01,,
4,2017-12-31 00:40:00+00:00,,,,,,,,,,...,,,,,,,,G01,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118219,2020-03-30 23:10:00+00:00,70.044465,27.523741,45.711129,1.515669,1.950088,1.950088,0.0,59.821165,55.193793,...,58.148777,39.008931,36.476562,178.0,13.775785,9.234004,2.0,G01,178.0,3.533445
118220,2020-03-30 23:20:00+00:00,40.833474,27.602882,45.598573,1.702809,2.136732,2.136732,0.0,59.142038,54.798545,...,57.550367,39.006759,36.328125,178.0,8.088928,9.229370,2.0,G01,178.0,3.261231
118221,2020-03-30 23:30:00+00:00,20.777790,27.560925,45.462045,1.706214,2.139664,2.139664,0.0,58.439439,54.380456,...,57.099335,39.003815,36.131944,178.0,4.355978,9.236802,2.0,G01,178.0,3.331839
118222,2020-03-30 23:40:00+00:00,62.091039,27.810472,45.343827,1.575352,2.009781,2.009781,0.0,58.205413,54.079014,...,56.847239,39.003815,36.007805,190.0,12.018077,9.237374,2.0,G01,190.0,3.284468


In [None]:
df = pd.read_csv('/root/LLM/LLM_time/datasets/Turbine_Data.csv')
df.fillna(0,inplace=True)
df.rename(columns={'Unnamed: 0':'Time'}, inplace=True)
print(df.columns)
df = df.iloc[:, 1:]

import time

# df.columns
for c in ['ActivePower', 'ReactivePower',
        'AmbientTemperatue', 'MainBoxTemperature', 'GearboxBearingTemperature', 'GeneratorWinding1Temperature', 'HubTemperature',
        'Blade1PitchAngle', 'NacellePosition', 'GeneratorRPM', 'RotorRPM', 
        'WindDirection', 'WindSpeed']:
    train = df.iloc[110000:110144, :][c]
    test = df.iloc[110144:110169, :][c]
    out = {}
    for model in model_names: # GPT-4 takes a about a minute to run
        start_time = time.time()
        model_hypers[model].update() # for promptcast
        hypers = list(grid_iter(model_hypers[model]))
        num_samples = 5
        pred_dict = get_autotuned_predictions_data(train, test, hypers, num_samples, model_predict_fns[model], verbose=False, parallel=False)
        out[model] = pred_dict
        end_time = time.time() - start_time
        # plot_preds(train, test, pred_dict, model, show_samples=True)
        plot_preds(train, test, pred_dict, model, c, end_time, show_samples=True)
        time.sleep(10)
    with open('/root/LLM/LLM_time/wind_power_results/WPF_output_{}.pkl'.format(c), 'wb') as f:
        pickle.dump(out, f)

# with open('my_dict.pkl', 'rb') as f:
#     loaded_dict = pickle.load(f)