In [39]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import requests

from sklearn.preprocessing import StandardScaler

def apply_pretreatment(dataframe,pretreatment_attrs=None):
    values = json.dumps(dataframe.values.tolist())
    index = json.dumps(dataframe.index.to_list())
    columns = json.dumps(dataframe.columns.to_list())
    pretreatment_attrs = json.dumps(pretreatment_attrs)
    query = f"""
        query {{
            featuresTargetsPretreatment(values: {values}, index: {index}, columns: {columns}, pretreatment_attrs: {pretreatment_attrs}) {{
                success,
                error,
                pretreated_values,
                index,
                columns,
                pretreatment_info
            }}
        }}
    """

    url = "http://172.31.39.13:8000/graphql"
    headers = {
                'Accept-Encoding': 'gzip, deflate, br',
                'Content-Type': 'application/json',
                'Accept': 'application/json',
                'Connection': 'keep-alive',
                'DNT': '1'
            }

    response = requests.post(url, json={"query": query}).json()

    if not response['data']['featuresTargetsPretreatment']['success']:
        print(response['data']['featuresTargetsPretreatment']['error'])
        print(response['data']['featuresTargetsPretreatment'])
    else:
        pretreated_dataframe = pd.DataFrame(
            data = response['data']['featuresTargetsPretreatment']['pretreated_values'],
            columns=response['data']['featuresTargetsPretreatment']['columns'],
            index=response['data']['featuresTargetsPretreatment']['index']
        )

    return pretreated_dataframe, response

In [40]:
dataframe = pd.read_csv('../data/msft_raw.csv',usecols=['DATE','OPCP','HPCP','LPCP','CPCP','ACPCP','VTCP', 'MPN5P'], index_col='DATE')

#dataframe.index = pd.to_datetime(dataframe.index)
dataframe.head()

Unnamed: 0_level_0,OPCP,HPCP,LPCP,CPCP,ACPCP,VTCP,MPN5P
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2000-02-29,45.875,45.875,44.4375,44.6875,27.880445,58437400,46.765625
2000-03-01,44.8125,47.046875,44.46875,45.40625,28.32888,106889800,47.070312
2000-03-02,45.90625,47.6875,45.5625,46.6875,29.128248,106932600,47.65625
2000-03-03,47.375,49.4375,46.9375,48.0625,29.98611,101435200,48.0
2000-03-06,48.0,48.6875,45.0625,45.3125,28.27039,93609400,48.75


In [41]:
train_end = '2022-10-28'
train = dataframe.loc[:train_end,:]
test = dataframe.loc[train_end:,:].drop(train_end,axis=0)

#### Pretreat Train

In [42]:
pretreated_train, ftp_response = apply_pretreatment(dataframe=train)
pretreatment_object = json.loads(ftp_response['data']['featuresTargetsPretreatment']['pretreatment_info'])

pretreated_train.head()

Unnamed: 0,OPCP,HPCP,LPCP,CPCP,ACPCP,VTCP,MPN5P
2000-02-29,0.133424,0.704556,0.359598,0.447105,0.218614,0.103702,0.497695
2000-03-01,0.120456,0.713322,0.360765,0.463347,0.234207,0.341638,0.504029
2000-03-02,0.133797,0.717927,0.400573,0.491035,0.2609,0.341794,0.515977
2000-03-03,0.150723,0.729885,0.447946,0.519075,0.288081,0.321113,0.522848
2000-03-06,0.157606,0.724868,0.382619,0.461258,0.232199,0.289609,0.537496


In [43]:
pretreatment_object

{'OPCP': {'T': '{"method": "box-cox", "standardize": true, "copy": true, "n_features_in_": 1, "lambdas_": -1.0411631296968533, "_scaler": {"with_mean": true, "with_std": true, "copy": false, "n_features_in_": 1, "n_samples_seen_": 5705, "mean_": 0.9380337656403179, "var_": 0.0001306305565661163, "scale_": 0.01142937253597573}}',
  'S': '{"copy": true, "n_features_in_": 1, "n_samples_seen_": 5705, "max_abs_": 2.980196915882384, "scale_": 2.980196915882384}'},
 'HPCP': {'T': '{"method": "box-cox", "standardize": true, "copy": true, "n_features_in_": 1, "lambdas_": -1.0509376502664403, "_scaler": {"with_mean": true, "with_std": true, "copy": false, "n_features_in_": 1, "n_samples_seen_": 5705, "mean_": 0.9303049320557474, "var_": 0.00011768459833065035, "scale_": 0.010848253238685495}}',
  'S': '{"feature_range": [0, 1], "copy": true, "clip": false, "n_features_in_": 1, "n_samples_seen_": 5705, "scale_": 0.2129631062592233, "min_": 0.6229450977175159, "data_min_": -2.9251315340942377, "da

#### Scale Test

In [44]:
pretreated_test, ftp_response = apply_pretreatment(dataframe=test,pretreatment_attrs=json.dumps(pretreatment_object))

pretreated_test.head()

Unnamed: 0,OPCP,HPCP,LPCP,CPCP,ACPCP,VTCP,MPN5P
2022-10-31,0.562155,0.979429,1.675193,1.256052,1.185767,-0.185046,1.250554
2022-11-01,0.562515,0.979649,1.670101,1.252841,1.182926,-0.154553,1.250554
2022-11-02,0.560274,0.978439,1.659886,1.245933,1.176846,-0.063394,1.25291
2022-11-03,0.555916,0.975257,1.650855,1.240594,1.172175,-0.082299,1.255457
2022-11-04,0.554668,0.975617,1.65001,1.247072,1.177845,-0.080608,1.256603
