In [None]:
!pip install boto3 pandas seaborn geopandas

In [None]:
import boto3
import pandas as pd
import pickle

import os
import seaborn as sns
import matplotlib.pyplot as plt
import pymongo
import numpy as np
import geopandas as gpd
import itertools
import math
import predictdatascript



# Create a SageMaker client
sagemaker = boto3.client('sagemaker-runtime')

# The name of your SageMaker endpoint
endpoint_name = 'v-twac-2024'

district = "cau giay"
city = "Ha Noi"


# Handle request body
data = predictdatascript.append(district, city)
weather_forecast_df = pd.DataFrame.from_dict(data)
if len(weather_forecast_df) == 0: 
    raise Exception("Empty dataframe district")
transform_cities = weather_forecast_df.copy()

####### Feat Engineering #######
transform_cities['city'] = transform_cities['location'].apply(lambda x: city)
transform_cities['datetime'] = transform_cities['datetime']

transform_cities['is_rain'] = transform_cities.weather_description.apply(lambda x: 'rain' in x.lower())
transform_cities['is_fog'] = transform_cities.weather_description.apply(lambda x: x == 'Fog')



# Convert the DataFrame to a pickle string
payload = pickle.dumps(transform_cities)



# Send the prediction request
response = sagemaker.invoke_endpoint(
    EndpointName=endpoint_name,
    Body=payload,
    ContentType='application/python-pickle',
    Accept='application/python-pickle'
)

# Load the prediction result from the response
result = pickle.loads(response['Body'].read())



forecast_result_df_ = pd.concat([transform_cities.reset_index(drop=True), pd.DataFrame(result)], axis=1)
forecast_result_df_["timeframe"] = forecast_result_df_.datetime.apply(lambda x: x.split(":")[1])
forecast_result_df_['avg_delay'] = forecast_result_df_[[1, 2]].mean(axis=1)

forecast_result_df_
# predictdatascript.writeToCollection(forecast_result_df_)


In [None]:
def plot_result(result):
    
    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(22, 15))
    start_hour = int(result.timeframe.reset_index(drop=True)[0])
    
    pallete_sns = ['r'] * ( 24 - start_hour)
    cycle_color = ['g', 'orange', 'r']
    for i in range(math.floor((len(result) - 24 + start_hour) / 24)):
        pallete_sns = pallete_sns + [cycle_color[i%len(cycle_color)]] * 24
         
    for k in range(3):
        sns.barplot(x = [x for x in range(start_hour, start_hour + len(result.timeframe))],
                y = result[k], ax=axes[k], palette=pallete_sns)
        
        axes[k].set_xticklabels([(x+start_hour) % 24 for x in range(len(result))])
        for j in range(len(result)):
            if (j + start_hour) % 24 == 0:
                axes[k].get_xticklabels()[j].set_fontsize(16)
                axes[k].get_xticklabels()[j].set_fontweight("bold")
        
        ax = axes[k].twinx()
        ax.grid(False)
        sns.lineplot(x = [x for x in range(len(result.timeframe))],
                y = result.precip, color='navy', marker='o', ax=ax, alpha=0.4)
        

        axes[k].set_title(f"Magnitude of delay: {k+1}")
        axes[k].set_ylim(0, result[[0 , 1, 2]].to_numpy().max())

    plt.show()

def plot_avg(result):
    fig, axes = plt.subplots(figsize=(22, 5))
    start_hour = int(result.timeframe.reset_index(drop=True)[0])

    pallete_sns = ['r'] * ( 24 - start_hour)
    cycle_color = ['g', 'orange', 'r']
    for i in range(math.floor((len(result) - 24 + start_hour) / 24)):
        pallete_sns = pallete_sns + [cycle_color[i%len(cycle_color)]] * 24


    sns.barplot(x = [x for x in range(len(result))], y = result['avg_delay'], ax=axes, palette=pallete_sns)

    ax = axes.twinx()
    ax.grid(False)
    sns.lineplot(x = [x for x in range(len(result.timeframe))],
                y = result.precip, color='navy', marker='o', ax=ax, alpha=0.4)

    ticks_label = []
    for x in range(len(result)):
        if (x + start_hour) % 24 == 0:
            temp_label = result.iloc[x].datetime.split(":")[0].split("-")[-1]
        else:
            temp_label = (x+start_hour) % 24 
        
        ticks_label.append(temp_label)
        
    axes.set_xticklabels(ticks_label)
    for j in range(len(result)):
        if (j + start_hour) % 24 == 0:
            axes.get_xticklabels()[j].set_fontsize(16)
            axes.get_xticklabels()[j].set_fontweight("bold")
    axes.set_ylim(0)
    axes.set_title(f"AVG magnitude of delay")
    plt.show()

In [None]:
forecast_result_df_[24*3: 24*8]

In [None]:
import os
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import math

sns.set(style="darkgrid")

In [None]:
plot_avg(forecast_result_df_[24*3:])

In [None]:
# plot_result(forecast_result_df_[24*3:])

In [None]:
plot_avg(forecast_result_df_[24*3:])

In [None]:
forecast_result_df_exp = forecast_result_df_.copy()
forecast_result_df_exp['avg_delay'] = forecast_result_df_exp[[0, 1, 2]].mean(axis=1)
plot_avg(forecast_result_df_exp[24*3:])