In [1]:
import tensorflow as tf
from tensorflow.python.client import device_lib
import pandas as pd
import numpy as np

In [2]:
def get_available_gpu_list():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']

In [3]:
%time
get_available_gpu_list()

Wall time: 0 ns


['/device:GPU:0', '/device:GPU:1']

In [4]:
DATA_PATH = "D:/Data/Forex/"

In [5]:
float32_cols = ['Open', 'High', 'Low', 'Close']
int32_cols = ['Volume', 'NumberOfTrades', 'BidVolume', 'AskVolume']

In [6]:
df_usdcad = pd.read_csv(
    DATA_PATH + "USDCAD.csv",
    header=0,
    names=['Date', 'Time', *float32_cols, *int32_cols],
    dtype={**{col: np.float32 for col in float32_cols},
           **{col: np.int32 for col in int32_cols}})

In [7]:
df_eurusd = pd.read_csv(
    DATA_PATH + "EURUSD.csv",
    header=0,
    names=['Date', 'Time', *float32_cols, *int32_cols],
    dtype={**{col: np.float32 for col in float32_cols},
           **{col: np.int32 for col in int32_cols}})

In [8]:
print(df_usdcad.shape)
print(df_eurusd.shape)

(72274072, 10)
(82332324, 10)


In [9]:
print(df_usdcad.info())
print(df_eurusd.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72274072 entries, 0 to 72274071
Data columns (total 10 columns):
Date              object
Time              object
Open              float32
High              float32
Low               float32
Close             float32
Volume            int32
NumberOfTrades    int32
BidVolume         int32
AskVolume         int32
dtypes: float32(4), int32(4), object(2)
memory usage: 3.2+ GB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 82332324 entries, 0 to 82332323
Data columns (total 10 columns):
Date              object
Time              object
Open              float32
High              float32
Low               float32
Close             float32
Volume            int32
NumberOfTrades    int32
BidVolume         int32
AskVolume         int32
dtypes: float32(4), int32(4), object(2)
memory usage: 3.7+ GB
None


In [10]:
df_usdcad["DateTime"] = df_usdcad["Date"] + df_usdcad["Time"]
df_eurusd["DateTime"] = df_eurusd["Date"] + df_eurusd["Time"]

df_usdcad.drop(["Date", "Time"], axis=1, inplace=True)
df_eurusd.drop(["Date", "Time"], axis=1, inplace=True)

df_usdcad['DateTime'] = pd.to_datetime(df_usdcad['DateTime'], format='%Y/%m/%d %H:%M:%S')
df_eurusd['DateTime'] = pd.to_datetime(df_eurusd['DateTime'], format='%Y/%m/%d %H:%M:%S')

In [11]:
df_usdcad.head()

Unnamed: 0,Open,High,Low,Close,Volume,NumberOfTrades,BidVolume,AskVolume,DateTime
0,1.00261,1.00264,1.00261,1.00264,2,2,0,0,2007-12-03 09:17:13
1,1.00258,1.00258,1.00249,1.0025,3,3,0,0,2007-12-03 09:17:21
2,1.00264,1.00264,1.00251,1.00251,3,3,0,0,2007-12-03 09:17:24
3,1.0025,1.00264,1.0025,1.00264,5,5,0,0,2007-12-03 09:17:28
4,1.00267,1.00267,1.00267,1.00267,1,1,0,0,2007-12-03 09:17:35


In [12]:
df_eurusd.head()

Unnamed: 0,Open,High,Low,Close,Volume,NumberOfTrades,BidVolume,AskVolume,DateTime
0,1.46351,1.46354,1.46351,1.46354,2,2,0,0,2007-12-03 09:16:54
1,1.46351,1.46351,1.46347,1.46347,3,3,0,0,2007-12-03 09:16:56
2,1.46354,1.46354,1.46354,1.46354,1,1,0,0,2007-12-03 09:17:01
3,1.46346,1.46347,1.46344,1.46344,3,3,0,0,2007-12-03 09:17:04
4,1.46347,1.46347,1.46338,1.46344,5,5,0,0,2007-12-03 09:17:08


In [13]:
print(df_usdcad.info())
print(df_eurusd.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72274072 entries, 0 to 72274071
Data columns (total 9 columns):
Open              float32
High              float32
Low               float32
Close             float32
Volume            int32
NumberOfTrades    int32
BidVolume         int32
AskVolume         int32
DateTime          datetime64[ns]
dtypes: datetime64[ns](1), float32(4), int32(4)
memory usage: 2.7 GB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 82332324 entries, 0 to 82332323
Data columns (total 9 columns):
Open              float32
High              float32
Low               float32
Close             float32
Volume            int32
NumberOfTrades    int32
BidVolume         int32
AskVolume         int32
DateTime          datetime64[ns]
dtypes: datetime64[ns](1), float32(4), int32(4)
memory usage: 3.1 GB
None


In [14]:
df_usdcad.to_hdf("D:/Data/Forex/USDCAD_modified.h5", key="USDCAD_modified", mode='w', format='fixed')
df_eurusd.to_hdf("D:/Data/Forex/EURUSD_modified.h5", key="EURUSD_modified", mode='w', format='fixed')