In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import requests

from sklearn.preprocessing import StandardScaler

def apply_pretreatment(dataframe,pretreatment_attrs=None):
    values = json.dumps(dataframe.values.tolist())
    index = json.dumps(dataframe.index.to_list())
    columns = json.dumps(dataframe.columns.to_list())
    pretreatment_attrs = json.dumps(pretreatment_attrs)
    query = f"""
        query {{
            featuresTargetsPretreatment(values: {values}, index: {index}, columns: {columns}, pretreatment_attrs: {pretreatment_attrs}) {{
                success,
                error,
                pretreated_values,
                index,
                columns,
                pretreatment_info
            }}
        }}
    """

    url = "http://127.0.0.1:4000/graphql"
    headers = {
                'Accept-Encoding': 'gzip, deflate, br',
                'Content-Type': 'application/json',
                'Accept': 'application/json',
                'Connection': 'keep-alive',
                'DNT': '1'
            }

    response = requests.post(url, json={"query": query}).json()

    if not response['data']['featuresTargetsPretreatment']['success']:
        print(response['data']['featuresTargetsPretreatment']['error'])
        print(response['data']['featuresTargetsPretreatment'])
    else:
        pretreated_dataframe = pd.DataFrame(
            data = response['data']['featuresTargetsPretreatment']['pretreated_values'],
            columns=response['data']['featuresTargetsPretreatment']['columns'],
            index=response['data']['featuresTargetsPretreatment']['index']
        )

    return pretreated_dataframe, response

In [2]:
dataframe = pd.read_csv('my_data.csv',usecols=['DATE','OPCP','HPCP','LPCP','CPCP','ACPCP','VTCP', 'MPN5P'], index_col='DATE')

#dataframe.index = pd.to_datetime(dataframe.index)
dataframe.head()

Unnamed: 0_level_0,OPCP,HPCP,LPCP,CPCP,ACPCP,VTCP,MPN5P
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
9/30/2010,1.476667,1.346,1.360667,1.360667,32937000,0,1.379333
10/1/2010,1.383333,1.354,1.373333,1.373333,8965500,0,1.376
10/4/2010,1.411333,1.353333,1.399333,1.399333,9654000,0,1.367333
10/5/2010,1.418667,1.400667,1.408,1.408,4980000,0,1.362
10/6/2010,1.417333,1.354667,1.364,1.364,4701000,0,1.362


In [3]:
train_end = '4/14/2021'

train = dataframe.loc[:train_end,:]
test = dataframe.loc[train_end:,:].drop(train_end,axis=0)

print(f"The length of the train data is {len(train)}")
print(f"The length of the test data is {len(test)}")


The length of the train data is 2652
The length of the test data is 1056


#### Pretreat Train

In [4]:
pretreated_train, ftp_response = apply_pretreatment(dataframe=train)
pretreatment_object = json.loads(ftp_response['data']['featuresTargetsPretreatment']['pretreatment_info'])

pretreated_train.head()

ConnectionError: HTTPConnectionPool(host='127.0.0.1', port=4000): Max retries exceeded with url: /graphql (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x0000027B2742B150>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))

In [43]:
pretreatment_object

{'OPCP': {'T': '{"method": "box-cox", "standardize": true, "copy": true, "n_features_in_": 1, "lambdas_": -1.0411631296968533, "_scaler": {"with_mean": true, "with_std": true, "copy": false, "n_features_in_": 1, "n_samples_seen_": 5705, "mean_": 0.9380337656403179, "var_": 0.0001306305565661163, "scale_": 0.01142937253597573}}',
  'S': '{"copy": true, "n_features_in_": 1, "n_samples_seen_": 5705, "max_abs_": 2.980196915882384, "scale_": 2.980196915882384}'},
 'HPCP': {'T': '{"method": "box-cox", "standardize": true, "copy": true, "n_features_in_": 1, "lambdas_": -1.0509376502664403, "_scaler": {"with_mean": true, "with_std": true, "copy": false, "n_features_in_": 1, "n_samples_seen_": 5705, "mean_": 0.9303049320557474, "var_": 0.00011768459833065035, "scale_": 0.010848253238685495}}',
  'S': '{"feature_range": [0, 1], "copy": true, "clip": false, "n_features_in_": 1, "n_samples_seen_": 5705, "scale_": 0.2129631062592233, "min_": 0.6229450977175159, "data_min_": -2.9251315340942377, "da

#### Scale Test

In [44]:
pretreated_test, ftp_response = apply_pretreatment(dataframe=test,pretreatment_attrs=json.dumps(pretreatment_object))

pretreated_test.head()

Unnamed: 0,OPCP,HPCP,LPCP,CPCP,ACPCP,VTCP,MPN5P
2022-10-31,0.562155,0.979429,1.675193,1.256052,1.185767,-0.185046,1.250554
2022-11-01,0.562515,0.979649,1.670101,1.252841,1.182926,-0.154553,1.250554
2022-11-02,0.560274,0.978439,1.659886,1.245933,1.176846,-0.063394,1.25291
2022-11-03,0.555916,0.975257,1.650855,1.240594,1.172175,-0.082299,1.255457
2022-11-04,0.554668,0.975617,1.65001,1.247072,1.177845,-0.080608,1.256603
