# TensorFlow Transform - EUR/USD Forex Data

Using the TensorFlow Transform tutorial in Google's documentation, I loaded a EUR/USD forex data csv via pandas and then converted to the format that the preprocessing_fn expects based on the metadata. 

In [469]:
import tensorflow as tf
import tensorflow_transform as tft

import tensorflow_transform.beam as tft_beam
from tensorflow_transform.tf_metadata import dataset_metadata
from tensorflow_transform.tf_metadata import schema_utils
import pandas as pd
from datetime import datetime

In [475]:
def preprocessing_fn(inputs):    
    open = inputs['Open']
    high = inputs['High']
    low = inputs['Low']
    close = inputs['Close']
    volume = inputs['Volume']
    
    scaledClose = tft.scale_to_0_1(close)
    scaledHigh = tft.scale_to_0_1(high)
    scaledLow = tft.scale_to_0_1(low)
    scaledOpen = tft.scale_to_0_1(open)
    scaledVolume = tft.scale_to_0_1(volume)
    
    return {
        "scaledOpen":scaledOpen,
        "scaledHigh":scaledHigh, 
        "scaledLow":scaledLow,
        "scaledClose":scaledClose,
        "scaledVolume":scaledVolume
    }

In [479]:
eurUsdDf = pd.read_csv('/Users/marae/source/repos/Machine-Learning-Engineering/MLOps Coursera/MLlifeProd/TensorFlow Data Transform Tutorial/EURUSD_D1.csv')

In [480]:
eurUsdDf.rename(columns={'2007-01-01 00:00':'Date','1.31908':'Open','1.32176':'High','1.3165':'Low','1.3212':'Close','1995282':'Volume'},inplace=True)
eurUsdDf['Date'] = pd.to_datetime(eurUsdDf['Date'])
eurUsdDf['Open'] = eurUsdDf['Open'].astype('float32')
eurUsdDf['High'] = eurUsdDf['High'].astype('float32')
eurUsdDf['Low'] = eurUsdDf['Low'].astype('float32')
eurUsdDf['Close'] = eurUsdDf['Close'].astype('float32')
eurUsdDf['Volume'] = eurUsdDf['Volume'].astype('float32')

## Lessons Learned

The below section represents the area of the assignment that I spent the most time on since, at first, it did not seem like I could use a dataframe at all to load the data to the preprocessing_fn. I tried so many different methods that at one point my import section was three times as long with probably four times more cells with scattered attempts at getting the right format for the data.

The method I eventually chose was one that I was aware of, but did not take as I felt their should be a more efficient way of doing this. For the sake of the assignment here, I stuck with the following solution, but I will ask around in Stack Overflow for something better. 

In [484]:
eurUsdDfTransformed = []
dictDf = {}
columns = eurUsdDf.columns
for index in eurUsdDf.index:
    for cols in list(np.arange(0,len(columns))):
        colIndex = eurUsdDf.columns.get_loc(columns[cols])
        if columns[cols] == "Date":
            dictDf[columns[cols]] = bytes(eurUsdDf.iloc[index,colIndex].strftime("%m/%d/%Y %H:%M:%S"),'utf-8')
        else:
            dictDf[columns[cols]] = float(eurUsdDf.iloc[index,colIndex])
    eurUsdDfTransformed.append(dictDf)
    dictDf = {}
    
RAW_DATA_FEATURE_SPEC = dict(
        [('Date',tf.io.FixedLenFeature([],tf.string))] +
        [('Open', tf.io.FixedLenFeature([], tf.float32))] +
        [('High', tf.io.FixedLenFeature([], tf.float32))] +
        [('Low', tf.io.FixedLenFeature([],tf.float32))] +
        [('Close', tf.io.FixedLenFeature([], tf.float32))] + 
        [('Volume', tf.io.FixedLenFeature([], tf.float32))]
    )


raw_data_metadata = dataset_metadata.DatasetMetadata(
    schema_utils.schema_from_feature_spec(RAW_DATA_FEATURE_SPEC))

In [485]:
with tft_beam.Context(temp_dir=tempfile.mkdtemp()):
    transformed_dataset, transform_fn = (  # pylint: disable=unused-variable
        (eurUsdDfTransformed, raw_data_metadata) | tft_beam.AnalyzeAndTransformDataset(
            preprocessing_fn))










INFO:tensorflow:Assets written to: C:\Users\marae\AppData\Local\Temp\tmpyz_1wibh\tftransform_tmp\5fb0ea0df7f5439392acffa3b9ac0c4f\assets


INFO:tensorflow:Assets written to: C:\Users\marae\AppData\Local\Temp\tmpyz_1wibh\tftransform_tmp\5fb0ea0df7f5439392acffa3b9ac0c4f\assets


INFO:tensorflow:Assets written to: C:\Users\marae\AppData\Local\Temp\tmpyz_1wibh\tftransform_tmp\4865b783a887489281131ab9ddeea2f6\assets


INFO:tensorflow:Assets written to: C:\Users\marae\AppData\Local\Temp\tmpyz_1wibh\tftransform_tmp\4865b783a887489281131ab9ddeea2f6\assets


In [486]:
transformed_data, transformed_metadata = transformed_dataset  # pylint: disable=unused-variable

print('\nRaw data:\n{}\n'.format(pprint.pformat(eurUsdDfTransformed)))
print('Transformed data:\n{}'.format(pprint.pformat(transformed_data)))


Raw data:
[{'Close': 1.3212000131607056,
  'Date': b'01/01/2007 00:00:00',
  'High': 1.3217600584030151,
  'Low': 1.316499948501587,
  'Open': 1.3190799951553345,
  'Volume': 1995282.0},
 {'Close': 1.327239990234375,
  'Date': b'01/02/2007 00:00:00',
  'High': 1.329509973526001,
  'Low': 1.3210099935531616,
  'Open': 1.321370005607605,
  'Volume': 2009661.0},
 {'Close': 1.3169000148773193,
  'Date': b'01/03/2007 00:00:00',
  'High': 1.3290300369262695,
  'Low': 1.3146100044250488,
  'Open': 1.3270200490951538,
  'Volume': 2016603.0},
 {'Close': 1.3085999488830566,
  'Date': b'01/04/2007 00:00:00',
  'High': 1.317829966545105,
  'Low': 1.3070399761199951,
  'Open': 1.316890001296997,
  'Volume': 2007990.0},
 {'Close': 1.2999500036239624,
  'Date': b'01/05/2007 00:00:00',
  'High': 1.310289978981018,
  'Low': 1.2980400323867798,
  'Open': 1.3086600303649902,
  'Volume': 1916561.0},
 {'Close': 1.3003900051116943,
  'Date': b'01/07/2007 00:00:00',
  'High': 1.3014099597930908,
  'Low': 1.


Transformed data:
[{'scaledClose': 0.5054913,
  'scaledHigh': 0.49808773,
  'scaledLow': 0.50848764,
  'scaledOpen': 0.5016931,
  'scaledVolume': 0.66976035},
 {'scaledClose': 0.5163128,
  'scaledHigh': 0.5118749,
  'scaledLow': 0.5166064,
  'scaledOpen': 0.50579613,
  'scaledVolume': 0.67458695},
 {'scaledClose': 0.49778733,
  'scaledHigh': 0.5110211,
  'scaledLow': 0.50508547,
  'scaledOpen': 0.51591927,
  'scaledVolume': 0.6769172},
 {'scaledClose': 0.48291665,
  'scaledHigh': 0.49109605,
  'scaledLow': 0.4914583,
  'scaledOpen': 0.4977693,
  'scaledVolume': 0.6740261},
 {'scaledClose': 0.46741912,
  'scaledHigh': 0.47768232,
  'scaledLow': 0.47525707,
  'scaledOpen': 0.48302367,
  'scaledVolume': 0.6433359},
 {'scaledClose': 0.46820745,
  'scaledHigh': 0.46188468,
  'scaledLow': 0.47889337,
  'scaledOpen': 0.4692454,
  'scaledVolume': 0.032554865},
 {'scaledClose': 0.47388694,
  'scaledHigh': 0.46629667,
  'scaledLow': 0.47378087,
  'scaledOpen': 0.46849293,
  'scaledVolume': 0.67

  'scaledVolume': 0.008024923}]
