In [1]:
import numpy as np
import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX
import datetime
import boto3
from io import StringIO
import gzip

In [2]:
# Create an STS client
sts_client = boto3.client('sts')

# Assume the 'dataexportrole' role with an external ID
assumed_role = sts_client.assume_role(
    RoleArn="arn:aws:iam::992382823879:role/dataexportrole",
    RoleSessionName="AssumeRoleSession1",
    ExternalId="VsCode"
)

# Extract the temporary credentials
credentials = assumed_role['Credentials']

# Create a new session with the assumed role's temporary credentials
session = boto3.Session(
    aws_access_key_id=credentials['AccessKeyId'],
    aws_secret_access_key=credentials['SecretAccessKey'],
    aws_session_token=credentials['SessionToken'],
    region_name='us-east-1'
)

# Use the session to create an S3 client
s3_client = session.client('s3')

bucket_name = 'dtbktmonish'
data_export_file_path = 'data/dataexport/data/BILLING_PERIOD=2024-06/dataexport-00001.csv.gz'

# Access the S3 object
response = s3_client.get_object(Bucket=bucket_name, Key=data_export_file_path)
gzipped_content = response['Body'].read()

# Decompress and read the content
decompressed_content = gzip.decompress(gzipped_content)
data_string = StringIO(decompressed_content.decode('utf-8'))
data = pd.read_csv(data_string, index_col=0)


In [3]:
data['line_item_usage_end_date'] = data['line_item_usage_end_date'].str.replace('T0', 'T', regex=False)
data['line_item_usage_end_date'] = pd.to_datetime(data['line_item_usage_end_date'])
data.set_index('line_item_usage_end_date', inplace=True)

In [4]:
filtered_data = data[data['line_item_operation'] == 'Rules']
hourly_data = filtered_data['pricing_public_on_demand_cost'].resample('h').sum()
model = SARIMAX(hourly_data, order=(1,1,1), seasonal_order=(1,1,1,12))
model_fit = model.fit()

n_hours = 24 * 31
forecast = model_fit.forecast(steps=n_hours)

forecast_daily = np.array(forecast).reshape(-1, 24)
daily_costs = forecast_daily.sum(axis=1)

  warn('Non-stationary starting seasonal autoregressive'


In [5]:
print('Hourly costs\n', forecast)

Hourly costs
 2024-06-20 09:00:00+00:00    0.000007
2024-06-20 10:00:00+00:00    0.000009
2024-06-20 11:00:00+00:00    0.000008
2024-06-20 12:00:00+00:00    0.000008
2024-06-20 13:00:00+00:00    0.000008
                               ...   
2024-07-21 04:00:00+00:00    0.000802
2024-07-21 05:00:00+00:00    0.000802
2024-07-21 06:00:00+00:00    0.000802
2024-07-21 07:00:00+00:00    0.000802
2024-07-21 08:00:00+00:00    0.000848
Freq: h, Name: predicted_mean, Length: 744, dtype: float64


In [6]:
print('\nDaily costs')
for i, cost in enumerate(daily_costs, start=1):
    print(f'Daily cost for day {i}: ${cost}')


Daily costs
Daily cost for day 1: $0.0004470693135410147
Daily cost for day 2: $0.0010725700238530837
Daily cost for day 3: $0.001696874129982088
Daily cost for day 4: $0.0023211782361110575
Daily cost for day 5: $0.0029454823422400273
Daily cost for day 6: $0.0035697864483689983
Daily cost for day 7: $0.004194090554497969
Daily cost for day 8: $0.0048183946606269405
Daily cost for day 9: $0.005442698766755913
Daily cost for day 10: $0.006067002872884886
Daily cost for day 11: $0.006691306979013853
Daily cost for day 12: $0.007315611085142819
Daily cost for day 13: $0.007939915191271784
Daily cost for day 14: $0.00856421929740075
Daily cost for day 15: $0.009188523403529715
Daily cost for day 16: $0.009812827509658681
Daily cost for day 17: $0.010437131615787647
Daily cost for day 18: $0.011061435721916613
Daily cost for day 19: $0.01168573982804558
Daily cost for day 20: $0.012310043934174552
Daily cost for day 21: $0.012934348040303523
Daily cost for day 22: $0.013558652146432494
Da

In [7]:
total_monthly_cost = sum(daily_costs)

print(f'\nTotal monthly cost: ${total_monthly_cost}')


Total monthly cost: $0.3041964561952361
