In [1]:
import numpy as np
import pandas as pd
import datetime as dt
from google.cloud import aiplatform

In [14]:
endpoint = aiplatform.Endpoint(
    endpoint_name="projects/119551364473/locations/us-central1/endpoints/4193614314151608320")


In [15]:
solar_data = pd.read_csv('gs://forecast_proj_resources/SolarMonthlyData_2657Custs.csv')

In [16]:

# Data preprocessing
# ======================================================================================
consumption_billrate = ['PK', 'LVP', 'SH', 'LVS','OP','OP1','OP2']
solar_data_consumption = solar_data[(solar_data['Netwk Bill Rate Type'].isin(consumption_billrate)) & (solar_data['Unit of me'] == 'KWH')].copy()
solar_data_consumption['Consumption Month'] = solar_data_consumption['Consumption Month'].apply(lambda x: "{:.4f}".format(x))
solar_data_consumption['Consumption Month'] = solar_data_consumption['Consumption Month'].astype('str')
solar_data_consumption['Consumption Month'] = pd.to_datetime(solar_data_consumption['Consumption Month'].apply(lambda x: dt.datetime.strptime(x, '%m.%Y')))
solar_data_consumption['house_type'] = 'solar'

generation_billrate = ['PGR', 'SGR','OGR','OGG','PGG','SGG']
solar_data_generation = solar_data[(solar_data['Netwk Bill Rate Type'].isin(generation_billrate)) & (solar_data['Unit of me'] == 'KWH')].copy()
solar_data_generation['Consumption Month'] = solar_data_generation['Consumption Month'].apply(lambda x: "{:.4f}".format(x))
solar_data_generation['Consumption Month'] = solar_data_generation['Consumption Month'].astype('str')
solar_data_generation['Consumption Month'] = pd.to_datetime(solar_data_generation['Consumption Month'].apply(lambda x: dt.datetime.strptime(x, '%m.%Y')))


solar_grouped_generation  = solar_data_generation.groupby(['Customer ID', 'Consumption Month']).agg({'Sum': 'sum'}).reset_index()
solar_grouped_consumption = solar_data_consumption.groupby(['Customer ID', 'Consumption Month']).agg({'Sum': 'sum'}).reset_index()

solar_net_consumption = solar_grouped_consumption.merge(solar_grouped_generation, on = [ 'Customer ID', 'Consumption Month'], how = 'left',
          suffixes=('_left', '_right'))

solar_net_consumption['Consumption'] = solar_net_consumption.fillna(0)['Sum_left'] - solar_net_consumption.fillna(0)['Sum_right']

solar_net_consumption.drop(['Sum_left','Sum_right'], axis=1, inplace=True)

df = solar_net_consumption.copy()

# Feature Engineering 
def create_lag_features(data, lag_steps):
    for lag in range(1, lag_steps + 1):
        data[f'lag_{lag}'] = data.groupby('Customer ID')['Consumption'].shift(lag)
    return data
df_long = create_lag_features(df, lag_steps=3)

def create_rolling_mean_features(data, window_size):
    data['rolling_mean'] = data.groupby('Customer ID')['Consumption'].transform(lambda x: x.shift(1).rolling(window=window_size).mean())
    return data
df_long = create_rolling_mean_features(df_long, window_size=2)


df_long.fillna(method='bfill', inplace=True)
df_long['Consumption Month'] = pd.to_datetime(df_long['Consumption Month'])



# splittting the data 
split_date = '2014-12-01'
train_data = df_long[df_long['Consumption Month'] < split_date]
test_data = df_long[df_long['Consumption Month'] == split_date]
# Separate features and target
X_train = train_data.drop(columns=['Customer ID', 'Consumption Month', 'Consumption'])
y_train = train_data['Consumption']
X_test = test_data.drop(columns=['Customer ID', 'Consumption Month', 'Consumption'])
y_test = test_data['Consumption']



  df_long.fillna(method='bfill', inplace=True)


In [35]:
data=X_test

In [36]:
data

Unnamed: 0,lag_1,lag_2,lag_3,rolling_mean
15715,1313.997,1357.796,1710.277,1335.8965
15814,521.994,710.159,1034.286,616.0765
15914,1205.124,1165.265,1127.676,1185.1945
15960,190.483,234.024,748.332,212.2535
16060,-63.965,-66.433,-68.954,-65.1990
...,...,...,...,...
211061,96.217,142.873,530.693,119.5450
211161,305.371,359.834,393.944,332.6025
211261,86.676,230.344,227.456,158.5100
211361,180.081,251.521,245.519,215.8010


In [37]:
instances = data.to_dict(orient='records')

In [39]:
instances = [
    [instance['lag_1'], instance['lag_2'], instance['lag_3'], instance['rolling_mean']]
    for instance in instances
]

In [40]:
endpoint.predict(instances=instances).predictions

[1258.112060546875,
 514.7933349609375,
 1161.950927734375,
 191.3374176025391,
 -50.51409149169922,
 -124.7681884765625,
 475.4295959472656,
 51.98151779174805,
 288.1329040527344,
 312.7424011230469,
 296.45068359375,
 539.65185546875,
 1184.643188476562,
 632.16064453125,
 -67.83030700683594,
 627.3519897460938,
 27.25554847717285,
 157.4530029296875,
 70.97901916503906,
 4.18696117401123,
 206.2483825683594,
 56.42861557006836,
 569.2613525390625,
 740.1498413085938,
 251.9488830566406,
 218.2422180175781,
 648.016357421875,
 234.2925109863281,
 -84.29142761230469,
 38.81754684448242,
 429.8067016601562,
 -360.5060119628906,
 258.3486328125,
 940.0026245117188,
 167.4456939697266,
 815.0634155273438,
 123.4134826660156,
 599.8372802734375,
 664.9160766601562,
 571.5523071289062,
 98.48089599609375,
 752.2933959960938,
 110.6708297729492,
 259.5294799804688,
 -95.32109832763672,
 -52.50738143920898,
 182.7959442138672,
 -402.0014038085938,
 -207.3392486572266,
 -106.1029815673828,
 

In [51]:
my_model=aiplatform.Model("projects/119551364473/locations/us-central1/models/6444916049069473792")
