In [None]:
import pandas as pd 
import numpy as np
import pyarrow.parquet as pq
import matplotlib as mpl
import matplotlib.pyplot as plt
from typing import List 
import sklearn.preprocessing as preproc 

#from streamlit_searchbox import st_searchbox

In [None]:
trade_data_all_years = pq.ParquetDataset('../data/country_partner_sitcproduct4digit_year_2020.parquet').read_pandas().to_pandas()
trade_data_all_years

In [None]:
trade_data_all_years['minmax_import'] =  preproc.minmax_scale(trade_data_all_years[['import_value']])
trade_data_all_years['minmax_export'] =  preproc.minmax_scale(trade_data_all_years[['export_value']])
trade_data_all_years['standardized_import'] =  preproc.StandardScaler().fit_transform(trade_data_all_years[['import_value']])
trade_data_all_years['standardized_export'] =  preproc.StandardScaler().fit_transform(trade_data_all_years[['export_value']])

In [None]:
trade_data_all_years

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 5))
trade_data_all_years['minmax_import'].hist(ax=ax[0])
trade_data_all_years['minmax_export'].hist(ax=ax[1])
# Convert to log scale 
ax[0].set_yscale('log')

In [None]:
from sklearn import linear_model 
from sklearn.model_selection import train_test_split
import sklearn.preprocessing as preproc
trade_data_all_years.columns

In [None]:
features = ['import_value', 'export_value',  'minmax_import', 'minmax_export', 'standardized_import', 'standardized_export']
X = trade_data_all_years[features]
y = trade_data_all_years[['sitc_eci']]

In [None]:
X2 = preproc.PolynomialFeatures(include_bias=False).fit_transform(X)

In [None]:
X1_train, X1_test, X2_train, X2_test, y_train, y_test = train_test_split(X, X2, y, test_size=0.2, random_state=42)

In [None]:
def evaluate_feature(X_train, X_test, y_train, y_test):
    
    """ Fit a linear model on the training data and evaluate it on the test data."""
    model = linear_model.LinearRegression().fit(X_train, y_train)
    r_score = model.score(X_test, y_test)
    return (model, r_score)

In [None]:
(m1, r1) = evaluate_feature(X1_train, X1_test, y_train, y_test)
(m2, r2) = evaluate_feature(X2_train, X2_test, y_train, y_test)
print("R squared for linear model: ", r1)
print("R squared for polynomial model: ", r2)

In [None]:
r1

In [None]:
# Text feature extraction
import json
from sklearn.feature_extraction.text import CountVectorizer
js =[]

In [None]:
import sagemaker
import boto3
sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

In [None]:
from sagemaker.huggingface.model import HuggingFaceModel
from sagemaker.serverless import ServerlessInferenceConfig

# Hub Model configuration. <https://huggingface.co/models>
hub = {
    'HF_MODEL_ID':'distilbert-base-uncased-finetuned-sst-2-english',
    'HF_TASK':'text-classification'
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   env=hub,                      # configuration for loading model from Hub
   role=role,                    # iam role with permissions to create an Endpoint
   transformers_version="4.17",  # transformers version used
   pytorch_version="1.10",        # pytorch version used
   py_version='py38',            # python version used
)

# Specify MemorySizeInMB and MaxConcurrency in the serverless config object
serverless_config = ServerlessInferenceConfig(
    memory_size_in_mb=4096, max_concurrency=10,
)

# deploy the endpoint endpoint
predictor = huggingface_model.deploy(
    serverless_inference_config=serverless_config
)