In [82]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import requests

from sklearn.preprocessing import StandardScaler

def apply_pretreatment(dataframe,pretreatment_attrs=None):
    values = json.dumps(dataframe.values.tolist())
    index = json.dumps(dataframe.index.to_list())
    columns = json.dumps(dataframe.columns.to_list())
    pretreatment_attrs = json.dumps(pretreatment_attrs)
    query = f"""
        query {{
            featuresTargetsPretreatment(values: {values}, index: {index}, columns: {columns}, pretreatment_attrs: {pretreatment_attrs}) {{
                success,
                error,
                pretreated_values,
                index,
                columns,
                pretreatment_info
            }}
        }}
    """

    url = "http://172.31.39.13:8000/graphql"
    headers = {
                'Accept-Encoding': 'gzip, deflate, br',
                'Content-Type': 'application/json',
                'Accept': 'application/json',
                'Connection': 'keep-alive',
                'DNT': '1'
            }

    response = requests.post(url, json={"query": query}).json()

    if not response['data']['featuresTargetsPretreatment']['success']:
        print(response['data']['featuresTargetsPretreatment']['error'])
        print(response['data']['featuresTargetsPretreatment'])
    else:
        pretreated_dataframe = pd.DataFrame(
            data = response['data']['featuresTargetsPretreatment']['pretreated_values'],
            columns=response['data']['featuresTargetsPretreatment']['columns'],
            index=response['data']['featuresTargetsPretreatment']['index']
        )

    return pretreated_dataframe, response

In [71]:
dataframe = pd.read_csv('../data/msft_raw.csv',usecols=['DATE','OPCP','HPCP','LPCP','CPCP','ACPCP','VTCP', 'MPN5P'], index_col='DATE')

#dataframe.index = pd.to_datetime(dataframe.index)
dataframe.head()

Unnamed: 0_level_0,OPCP,HPCP,LPCP,CPCP,ACPCP,VTCP,MPN5P
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2000-02-29,45.875,45.875,44.4375,44.6875,27.880445,58437400,46.765625
2000-03-01,44.8125,47.046875,44.46875,45.40625,28.32888,106889800,47.070312
2000-03-02,45.90625,47.6875,45.5625,46.6875,29.128248,106932600,47.65625
2000-03-03,47.375,49.4375,46.9375,48.0625,29.98611,101435200,48.0
2000-03-06,48.0,48.6875,45.0625,45.3125,28.27039,93609400,48.75


In [72]:
train_end = '2022-10-28'
train = dataframe.loc[:train_end,:]
test = dataframe.loc[train_end:,:].drop(train_end,axis=0)

#### Scale Train

In [86]:
scaled_train, ftp_response = apply_pretreatment(dataframe=train)
scaler_objects = json.loads(ftp_response['data']['featuresTargetsPretreatment']['pretreatment_info'])

scaled_train.head()

Unnamed: 0,OPCP,HPCP,LPCP,CPCP,ACPCP,VTCP,MPN5P
2000-02-29,-0.292855,-0.299276,-0.305834,-0.308935,-0.435561,0.197057,-0.282037
2000-03-01,-0.307132,-0.283689,-0.305409,-0.299278,-0.429645,1.759482,-0.277948
2000-03-02,-0.292435,-0.275168,-0.290547,-0.282064,-0.4191,1.760862,-0.270085
2000-03-03,-0.272699,-0.251891,-0.271864,-0.26359,-0.407782,1.58359,-0.265471
2000-03-06,-0.2643,-0.261867,-0.297341,-0.300537,-0.430417,1.331234,-0.255406


#### Scale Test

In [100]:
scaled_test, ftp_response = apply_pretreatment(dataframe=test,pretreatment_attrs=json.dumps(scaler_objects))

In [65]:
def transform_test(test,scaler_objects):
    scaled_test_features = []
    for col in scaler_objects.keys():
        scaler_attrs = scaler_objects[col]['S']
        scaler_attrs = json.loads(scaler_attrs)
        scaler = StandardScaler()
        scaler.with_mean = scaler_attrs['with_mean']
        scaler.with_std = scaler_attrs['with_std']
        scaler.copy = scaler_attrs['copy']
        scaler.n_features_in_ = scaler_attrs['n_features_in_']
        scaler.n_samples_seen_ = np.int64(scaler_attrs['n_samples_seen_'])
        scaler.mean_ = np.array([scaler_attrs['mean_']])
        scaler.var_ = np.array([scaler_attrs['var_']])
        scaler.scale_ = np.array([scaler_attrs['scale_']])
        #transform test
        scaled_test = pd.Series(scaler.transform(test[col].values.reshape(-1,1)).flatten(), index=test.index)
        scaled_test_features.append(scaled_test)
    
    scaled_test = pd.DataFrame(scaled_test_features).transpose()
    scaled_test.columns = test.columns
    return scaled_test

scaled_test = transform_test(test,scaler_objects)
scaled_test.head()

Unnamed: 0_level_0,OPCP,HPCP,LPCP,CPCP,ACPCP,VTCP,MPN5P
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-10-31,2.231828,2.215267,2.231213,2.209451,2.24277,-0.772923,2.064217
2022-11-01,2.243115,2.226174,2.179307,2.156247,2.190805,-0.700852,2.064217
2022-11-02,2.174047,2.167116,2.080251,2.047822,2.084906,-0.448855,2.10099
2022-11-03,2.048139,2.022265,1.997908,1.969224,2.008139,-0.506031,2.141789
2022-11-04,2.014008,2.03796,1.990434,2.065154,2.101834,-0.501027,2.160511


In [66]:
json.dumps(None)

'null'