In [None]:
#Import necessary libraries

import pandas as pd
import numpy as np
import numba as nb

In [None]:
#Load the dataset
from google.colab import drive
drive.mount('/content/drive')
Data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/EURUSD1.csv', sep="\t", header=None)
Data.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

Mounted at /content/drive


In [None]:
#Initial check
print(Data.head(10))
print('\n')
print(Data.dtypes)
print('\n')
print(Data.info())
print('\n')
print(Data.isnull().sum())
print('\n')
print(Data.duplicated().sum())

               Time     Open     High      Low    Close  Volume
0  2024-06-12 10:22  1.07518  1.07519  1.07511  1.07518     146
1  2024-06-12 10:23  1.07517  1.07521  1.07515  1.07519     104
2  2024-06-12 10:24  1.07518  1.07524  1.07517  1.07522      83
3  2024-06-12 10:25  1.07521  1.07524  1.07516  1.07517     115
4  2024-06-12 10:26  1.07519  1.07535  1.07519  1.07529     110
5  2024-06-12 10:27  1.07530  1.07530  1.07524  1.07530      50
6  2024-06-12 10:28  1.07532  1.07537  1.07529  1.07534      53
7  2024-06-12 10:29  1.07534  1.07537  1.07534  1.07536      44
8  2024-06-12 10:30  1.07537  1.07543  1.07537  1.07541      47
9  2024-06-12 10:31  1.07540  1.07546  1.07537  1.07540      33


Time       object
Open      float64
High      float64
Low       float64
Close     float64
Volume      int64
dtype: object


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195544 entries, 0 to 195543
Data columns (total 6 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  ---------

In [None]:
#Preprocessing - Convert Time column type
Data['Time'] = pd.to_datetime(Data['Time'])
print(Data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195544 entries, 0 to 195543
Data columns (total 6 columns):
 #   Column  Non-Null Count   Dtype         
---  ------  --------------   -----         
 0   Time    195544 non-null  datetime64[ns]
 1   Open    195544 non-null  float64       
 2   High    195544 non-null  float64       
 3   Low     195544 non-null  float64       
 4   Close   195544 non-null  float64       
 5   Volume  195544 non-null  int64         
dtypes: datetime64[ns](1), float64(4), int64(1)
memory usage: 9.0 MB
None


In [None]:
#Implement RSI calculation on CPU

def RSI_CPU_1(Data, period=14):

  #Calculate price changes based on the Close price
  Delta = Data['Close'].diff()

  #Calculate gains and losses
  Gain = Delta.where(Delta > 0, 0)
  Loss = -Delta.where(Delta < 0, 0)

  #Simple averages
  AverageGain = Gain.rolling(window=period, min_periods=1).mean()
  AverageLoss = Loss.rolling(window=period, min_periods=1).mean()

  #Calculate RS 7 RSI
  RS = AverageGain / AverageLoss
  RSI = 100 - (100 / (1 + RS))
  Data['RSI'] = RSI

  return Data[['Time', 'Close', 'RSI']]

RSI_CPU_1(Data, period=14)
print(Data[['Time', 'Close', 'RSI']].head(10).round(2))

                 Time  Close     RSI
0 2024-06-12 10:22:00   1.08     NaN
1 2024-06-12 10:23:00   1.08  100.00
2 2024-06-12 10:24:00   1.08  100.00
3 2024-06-12 10:25:00   1.08   44.44
4 2024-06-12 10:26:00   1.08   76.19
5 2024-06-12 10:27:00   1.08   77.27
6 2024-06-12 10:28:00   1.08   80.77
7 2024-06-12 10:29:00   1.08   82.14
8 2024-06-12 10:30:00   1.08   84.85
9 2024-06-12 10:31:00   1.08   82.35


In [None]:
import cupy as cp
print("CuPy GPU Available:", cp.cuda.is_available())
print("CuPy Version:", cp.__version__)

CuPy GPU Available: True
CuPy Version: 12.2.0


In [None]:
#Accelerate RSI calculation with GPU

import cudf

def RSI_GPU_1(Data, period=14):

  #Convert dataframe (Pandas to cuDF)
  Data = cudf.DataFrame(Data)

  #Calculate price changes based on the Close price
  Delta = Data['Close'].diff()

  #Calculate gains and losses
  Gain = Delta.where(Delta > 0, 0)
  Loss = -Delta.where(Delta < 0, 0)

  #Simple rolling averages
  AverageGain = Gain.rolling(window=period, min_periods=1).mean()
  AverageLoss = Loss.rolling(window=period, min_periods=1).mean()

  #Calculate RS 7 RSI
  RS = AverageGain / AverageLoss
  RSI = 100 - (100 / (1 + RS))
  Data['RSI'] = RSI

  return Data[['Time', 'Close', 'RSI']].to_pandas()

RSI_GPU_1(Data, period=14)
print(Data[['Time', 'Close', 'RSI']].head(10).round(2))

                 Time  Close     RSI
0 2024-06-12 10:22:00   1.08     NaN
1 2024-06-12 10:23:00   1.08  100.00
2 2024-06-12 10:24:00   1.08  100.00
3 2024-06-12 10:25:00   1.08   44.44
4 2024-06-12 10:26:00   1.08   76.19
5 2024-06-12 10:27:00   1.08   77.27
6 2024-06-12 10:28:00   1.08   80.77
7 2024-06-12 10:29:00   1.08   82.14
8 2024-06-12 10:30:00   1.08   84.85
9 2024-06-12 10:31:00   1.08   82.35


In [None]:
#Benchmark the execution time

import time
import cudf

#CPU function
def RSI_CPU_1(Data, period=14):
  Delta = Data['Close'].diff()
  Gain = Delta.where(Delta > 0, 0)
  Loss = -Delta.where(Delta < 0, 0)
  AverageGain = Gain.rolling(window=period, min_periods=1).mean()
  AverageLoss = Loss.rolling(window=period, min_periods=1).mean()
  RS = AverageGain / AverageLoss
  RSI = 100 - (100 / (1 + RS))
  Data['RSI'] = RSI
  return Data[['Time', 'Close', 'RSI']]

#GPU function
def RSI_GPU_1(Data, period=14):
  Data = cudf.DataFrame(Data)
  Delta = Data['Close'].diff()
  Gain = Delta.where(Delta > 0, 0)
  Loss = -Delta.where(Delta < 0, 0)
  AverageGain = Gain.rolling(window=period, min_periods=1).mean()
  AverageLoss = Loss.rolling(window=period, min_periods=1).mean()
  RS = AverageGain / AverageLoss
  RSI = 100 - (100 / (1 + RS))
  Data['RSI'] = RSI
  return Data[['Time', 'Close', 'RSI']].to_pandas()

#Numpy array needed for GPU compatibility
DataNP = Data.to_numpy()

#CPU benchmark
CPUfunction = RSI_CPU_1(Data)
Start_Time = time.time()
End_Time = time.time()
CPU_Time = End_Time - Start_Time
print(f"CPU RSI calculation time: {CPU_Time:.6f} seconds")

#GPU benchmark
GPUfunction = RSI_GPU_1(Data)
Start_Time = time.time()
End_Time = time.time()
GPU_Time = End_Time - Start_Time
print(f"GPU RSI calculation time: {GPU_Time:.6f} seconds")

CPU RSI calculation time: 0.000033 seconds
GPU RSI calculation time: 0.000035 seconds


In [None]:
#Load the dataset
from google.colab import drive
drive.mount('/content/drive')

EURUSD1 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/EURUSD1.csv', sep="\t", header=None)
EURUSD1.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

EURUSD5 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/EURUSD5.csv', sep="\t", header=None)
EURUSD5.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

EURUSD15 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/EURUSD15.csv', sep="\t", header=None)
EURUSD15.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

EURUSD30 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/EURUSD30.csv', sep="\t", header=None)
EURUSD30.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#Initial check
print(EURUSD1.head(5))
print('\n')
print(EURUSD5.head(5))
print('\n')
print(EURUSD15.head(5))
print('\n')
print(EURUSD30.head(5))
print('\n')

               Time     Open     High      Low    Close  Volume
0  2024-06-12 10:22  1.07518  1.07519  1.07511  1.07518     146
1  2024-06-12 10:23  1.07517  1.07521  1.07515  1.07519     104
2  2024-06-12 10:24  1.07518  1.07524  1.07517  1.07522      83
3  2024-06-12 10:25  1.07521  1.07524  1.07516  1.07517     115
4  2024-06-12 10:26  1.07519  1.07535  1.07519  1.07529     110


               Time     Open     High      Low    Close  Volume
0  2022-04-25 18:35  1.07175  1.07180  1.07153  1.07174     328
1  2022-04-25 18:40  1.07174  1.07175  1.07157  1.07159     608
2  2022-04-25 18:45  1.07159  1.07185  1.07152  1.07154     396
3  2022-04-25 18:50  1.07153  1.07159  1.07132  1.07154     285
4  2022-04-25 18:55  1.07157  1.07163  1.07093  1.07094     856


               Time     Open     High      Low    Close  Volume
0  2016-12-19 16:00  1.04350  1.04435  1.04335  1.04380    3269
1  2016-12-19 16:15  1.04382  1.04500  1.04344  1.04493    3327
2  2016-12-19 16:30  1.04493  1.0451

In [None]:
#Initial check - EURUSD1
print(EURUSD1.dtypes)
print('\n')
print(EURUSD1.info())
print('\n')
print(EURUSD1.isnull().sum())
print('\n')
print(EURUSD1.duplicated().sum())

Time       object
Open      float64
High      float64
Low       float64
Close     float64
Volume      int64
dtype: object


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195544 entries, 0 to 195543
Data columns (total 6 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    195544 non-null  object 
 1   Open    195544 non-null  float64
 2   High    195544 non-null  float64
 3   Low     195544 non-null  float64
 4   Close   195544 non-null  float64
 5   Volume  195544 non-null  int64  
dtypes: float64(4), int64(1), object(1)
memory usage: 9.0+ MB
None


Time      0
Open      0
High      0
Low       0
Close     0
Volume    0
dtype: int64


0


In [None]:
#Initial check - EURUSD5
print(EURUSD5.dtypes)
print('\n')
print(EURUSD5.info())
print('\n')
print(EURUSD5.isnull().sum())
print('\n')
print(EURUSD5.duplicated().sum())

Time       object
Open      float64
High      float64
Low       float64
Close     float64
Volume      int64
dtype: object


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195578 entries, 0 to 195577
Data columns (total 6 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    195578 non-null  object 
 1   Open    195578 non-null  float64
 2   High    195578 non-null  float64
 3   Low     195578 non-null  float64
 4   Close   195578 non-null  float64
 5   Volume  195578 non-null  int64  
dtypes: float64(4), int64(1), object(1)
memory usage: 9.0+ MB
None


Time      0
Open      0
High      0
Low       0
Close     0
Volume    0
dtype: int64


0


In [None]:
#Initial check - EURUSD15
print(EURUSD15.dtypes)
print('\n')
print(EURUSD15.info())
print('\n')
print(EURUSD15.isnull().sum())
print('\n')
print(EURUSD15.duplicated().sum())

Time       object
Open      float64
High      float64
Low       float64
Close     float64
Volume      int64
dtype: object


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195604 entries, 0 to 195603
Data columns (total 6 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    195604 non-null  object 
 1   Open    195604 non-null  float64
 2   High    195604 non-null  float64
 3   Low     195604 non-null  float64
 4   Close   195604 non-null  float64
 5   Volume  195604 non-null  int64  
dtypes: float64(4), int64(1), object(1)
memory usage: 9.0+ MB
None


Time      0
Open      0
High      0
Low       0
Close     0
Volume    0
dtype: int64


0


In [None]:
#Initial check - EURUSD30
print(EURUSD30.dtypes)
print('\n')
print(EURUSD30.info())
print('\n')
print(EURUSD30.isnull().sum())
print('\n')
print(EURUSD30.duplicated().sum())

Time       object
Open      float64
High      float64
Low       float64
Close     float64
Volume      int64
dtype: object


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195585 entries, 0 to 195584
Data columns (total 6 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    195585 non-null  object 
 1   Open    195585 non-null  float64
 2   High    195585 non-null  float64
 3   Low     195585 non-null  float64
 4   Close   195585 non-null  float64
 5   Volume  195585 non-null  int64  
dtypes: float64(4), int64(1), object(1)
memory usage: 9.0+ MB
None


Time      0
Open      0
High      0
Low       0
Close     0
Volume    0
dtype: int64


0


In [None]:
#Preprocessing - Convert Time column type
EURUSD1['Time'] = pd.to_datetime(EURUSD1['Time'])
EURUSD5['Time'] = pd.to_datetime(EURUSD5['Time'])
EURUSD15['Time'] = pd.to_datetime(EURUSD15['Time'])
EURUSD30['Time'] = pd.to_datetime(EURUSD30['Time'])

print(EURUSD1.info())
print('\n')
print(EURUSD5.info())
print('\n')
print(EURUSD15.info())
print('\n')
print(EURUSD30.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195544 entries, 0 to 195543
Data columns (total 6 columns):
 #   Column  Non-Null Count   Dtype         
---  ------  --------------   -----         
 0   Time    195544 non-null  datetime64[ns]
 1   Open    195544 non-null  float64       
 2   High    195544 non-null  float64       
 3   Low     195544 non-null  float64       
 4   Close   195544 non-null  float64       
 5   Volume  195544 non-null  int64         
dtypes: datetime64[ns](1), float64(4), int64(1)
memory usage: 9.0 MB
None


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195578 entries, 0 to 195577
Data columns (total 6 columns):
 #   Column  Non-Null Count   Dtype         
---  ------  --------------   -----         
 0   Time    195578 non-null  datetime64[ns]
 1   Open    195578 non-null  float64       
 2   High    195578 non-null  float64       
 3   Low     195578 non-null  float64       
 4   Close   195578 non-null  float64       
 5   Volume  195578 non-null 

In [None]:
#Implement RSI on CPU

def RSI_CPU_2(Data, period=14):

  Delta = Data['Close'].diff()
  Gain = Delta.where(Delta > 0, 0)
  Loss = -Delta.where(Delta < 0, 0)

  AverageGain = Gain.rolling(window=period, min_periods=1).mean()
  AverageLoss = Loss.rolling(window=period, min_periods=1).mean()

  RS = AverageGain / AverageLoss
  RSI = 100 - (100 / (1 + RS))
  Data[f'RSI_{period}'] = RSI

  return Data[['Time', 'Close', f'RSI_{period}']]

#Define the lists
Datasets = [EURUSD1, EURUSD5, EURUSD15, EURUSD30]
Datasets_names = ['EURUSD1', 'EURUSD5', 'EURUSD15', 'EURUSD30']
Periods = [7, 14, 21, 28]

#Calculate RSI
for i, Dataset in enumerate(Datasets):
  print(f"Calculating RSI for {Datasets_names[i]}...")

  for period in Periods:
    RSI_CPU_2(Dataset, period)

#Show the results
for i, dataset in enumerate(Datasets):
    print(f"CPU Processing {Datasets_names[i]}")

    for period in Periods:
        RSI_CPU_2(dataset, period)
    print(dataset[['Time', 'Close'] + [f'RSI_{p}' for p in Periods]].head(10).round(2))

Calculating RSI for EURUSD1...
Calculating RSI for EURUSD5...
Calculating RSI for EURUSD15...
Calculating RSI for EURUSD30...
CPU Processing EURUSD1
                 Time  Close   RSI_7  RSI_14  RSI_21  RSI_28
0 2024-06-12 10:22:00   1.08     NaN     NaN     NaN     NaN
1 2024-06-12 10:23:00   1.08  100.00  100.00  100.00  100.00
2 2024-06-12 10:24:00   1.08  100.00  100.00  100.00  100.00
3 2024-06-12 10:25:00   1.08   44.44   44.44   44.44   44.44
4 2024-06-12 10:26:00   1.08   76.19   76.19   76.19   76.19
5 2024-06-12 10:27:00   1.08   77.27   77.27   77.27   77.27
6 2024-06-12 10:28:00   1.08   80.77   80.77   80.77   80.77
7 2024-06-12 10:29:00   1.08   82.14   82.14   82.14   82.14
8 2024-06-12 10:30:00   1.08   84.37   84.85   84.85   84.85
9 2024-06-12 10:31:00   1.08   80.00   82.35   82.35   82.35
CPU Processing EURUSD5
                 Time  Close  RSI_7  RSI_14  RSI_21  RSI_28
0 2022-04-25 18:35:00   1.07    NaN     NaN     NaN     NaN
1 2022-04-25 18:40:00   1.07   0.00  

In [None]:
#Accelerate with GPU
import cudf

def RSI_GPU_2(Data, period):
    Data = cudf.DataFrame(Data)

    Delta = Data['Close'].diff()
    Gain = Delta.where(Delta > 0, 0)
    Loss = -Delta.where(Delta < 0, 0)

    AverageGain = Gain.rolling(window=period, min_periods=1).mean()
    AverageLoss = Loss.rolling(window=period, min_periods=1).mean()

    RS = AverageGain / AverageLoss
    RSI = 100 - (100 / (1 + RS))
    Data[f'RSI_{period}'] = RSI

    return Data.to_pandas()

#Define the lists
Datasets = [EURUSD1, EURUSD5, EURUSD15, EURUSD30]
Datasets_names = ['EURUSD1', 'EURUSD5', 'EURUSD15', 'EURUSD30']
Periods = [7, 14, 21, 28]

# Calculate RSI for all datasets and all periods
for i, dataset in enumerate(Datasets):
    print(f"GPU Processing {Datasets_names[i]}")
    for period in Periods:
        dataset = RSI_GPU_2(dataset, period)

    print(dataset[['Time', 'Close'] + [f'RSI_{p}' for p in Periods]].head(10).round(2))

GPU Processing EURUSD1
                 Time  Close   RSI_7  RSI_14  RSI_21  RSI_28
0 2024-06-12 10:22:00   1.08     NaN     NaN     NaN     NaN
1 2024-06-12 10:23:00   1.08  100.00  100.00  100.00  100.00
2 2024-06-12 10:24:00   1.08  100.00  100.00  100.00  100.00
3 2024-06-12 10:25:00   1.08   44.44   44.44   44.44   44.44
4 2024-06-12 10:26:00   1.08   76.19   76.19   76.19   76.19
5 2024-06-12 10:27:00   1.08   77.27   77.27   77.27   77.27
6 2024-06-12 10:28:00   1.08   80.77   80.77   80.77   80.77
7 2024-06-12 10:29:00   1.08   82.14   82.14   82.14   82.14
8 2024-06-12 10:30:00   1.08   84.37   84.85   84.85   84.85
9 2024-06-12 10:31:00   1.08   80.00   82.35   82.35   82.35
GPU Processing EURUSD5
                 Time  Close  RSI_7  RSI_14  RSI_21  RSI_28
0 2022-04-25 18:35:00   1.07    NaN     NaN     NaN     NaN
1 2022-04-25 18:40:00   1.07   0.00    0.00    0.00    0.00
2 2022-04-25 18:45:00   1.07   0.00    0.00    0.00    0.00
3 2022-04-25 18:50:00   1.07   0.00    0.00

In [None]:
#Benchmark - Just Computation Time
import time
import cudf

datasets = {'EURUSD1': EURUSD1, 'EURUSD5': EURUSD5, 'EURUSD15': EURUSD15, 'EURUSD30': EURUSD30}
periods = [7, 14, 21, 28]

#Benchmark for CPU and GPU
for name, dataset in datasets.items():
    print(f"Processing {name}...")

    #CPU
    for period in periods:
        start_time = time.time()
        RSI_CPU_2(dataset.copy(), period=period)  #Copy to prevent side-effects
        end_time = time.time()
        cpu_time = end_time - start_time
        print(f"  CPU (Period {period}) RSI time: {cpu_time:.6f} seconds")

    #GPU
    for period in periods:
        start_time = time.time()
        RSI_GPU_2(dataset.copy(), period=period)  #Copy to prevent side-effects
        end_time = time.time()
        gpu_time = end_time - start_time
        print(f"  GPU (Period {period}) RSI time: {gpu_time:.6f} seconds")

Processing EURUSD1...
  CPU (Period 7) RSI time: 0.026169 seconds
  CPU (Period 14) RSI time: 0.024878 seconds
  CPU (Period 21) RSI time: 0.025715 seconds
  CPU (Period 28) RSI time: 0.026216 seconds
  GPU (Period 7) RSI time: 0.066848 seconds
  GPU (Period 14) RSI time: 0.068110 seconds
  GPU (Period 21) RSI time: 0.066772 seconds
  GPU (Period 28) RSI time: 0.065874 seconds
Processing EURUSD5...
  CPU (Period 7) RSI time: 0.026691 seconds
  CPU (Period 14) RSI time: 0.023197 seconds
  CPU (Period 21) RSI time: 0.025265 seconds
  CPU (Period 28) RSI time: 0.023038 seconds
  GPU (Period 7) RSI time: 0.068308 seconds
  GPU (Period 14) RSI time: 0.066545 seconds
  GPU (Period 21) RSI time: 0.067980 seconds
  GPU (Period 28) RSI time: 0.067851 seconds
Processing EURUSD15...
  CPU (Period 7) RSI time: 0.027473 seconds
  CPU (Period 14) RSI time: 0.024474 seconds
  CPU (Period 21) RSI time: 0.023873 seconds
  CPU (Period 28) RSI time: 0.031920 seconds
  GPU (Period 7) RSI time: 0.070185 se

In [None]:
#Load the dataset
from google.colab import drive
drive.mount('/content/drive')

EURUSD1_2 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/EURUSD1.csv', sep="\t", header=None)
EURUSD1_2.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

EURUSD5_2 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/EURUSD5.csv', sep="\t", header=None)
EURUSD5_2.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

EURUSD15_2 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/EURUSD15.csv', sep="\t", header=None)
EURUSD15_2.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

EURUSD30_2 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/EURUSD30.csv', sep="\t", header=None)
EURUSD30_2.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

#####

GBPUSD1 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/GBPUSD1.csv', sep="\t", header=None)
GBPUSD1.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

GBPUSD5 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/GBPUSD5.csv', sep="\t", header=None)
GBPUSD5.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

GBPUSD15 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/GBPUSD15.csv', sep="\t", header=None)
GBPUSD15.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

GBPUSD30 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/GBPUSD30.csv', sep="\t", header=None)
GBPUSD30.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#Combine EURUSD datasets
EURUSD = pd.concat([EURUSD1_2, EURUSD5_2, EURUSD15_2, EURUSD30_2], ignore_index=True)

#Combine GBPUSD datasets
GBPUSD = pd.concat([GBPUSD1, GBPUSD5, GBPUSD15, GBPUSD30], ignore_index=True)

#Merge both datasets
Merged = pd.concat([EURUSD, GBPUSD], ignore_index=True)
Merged['Time'] = pd.to_datetime(Merged['Time'])
Merged = Merged.sort_values(by=['Time'])

In [None]:
#Initial check
print(Merged.head(10))
print('\n')
print(Merged.dtypes)

                       Time     Open     High      Low    Close  Volume
1369117 2008-12-16 06:30:00  1.52810  1.52940  1.52720  1.52850    2074
1369118 2008-12-16 07:00:00  1.52850  1.52940  1.52450  1.52530    3769
1369119 2008-12-16 07:30:00  1.52525  1.53120  1.52410  1.52940    6964
1369120 2008-12-16 08:00:00  1.52925  1.53010  1.52580  1.52665   12089
1369121 2008-12-16 08:30:00  1.52660  1.52740  1.52105  1.52170   16300
1369122 2008-12-16 09:00:00  1.52170  1.52560  1.52105  1.52520   12405
1369123 2008-12-16 09:30:00  1.52480  1.52850  1.52045  1.52350   11497
1369124 2008-12-16 10:00:00  1.52350  1.52465  1.52055  1.52165   13134
1369125 2008-12-16 10:30:00  1.52160  1.52370  1.52030  1.52295    8341
1369126 2008-12-16 11:00:00  1.52295  1.52545  1.52260  1.52510   11960


Time      datetime64[ns]
Open             float64
High             float64
Low              float64
Close            float64
Volume             int64
dtype: object


In [None]:
#Initial check
print(Merged.info())
print('\n')
print(Merged.isnull().sum())
print('\n')
print(Merged.duplicated().sum())

<class 'pandas.core.frame.DataFrame'>
Index: 1564707 entries, 1369117 to 195543
Data columns (total 6 columns):
 #   Column  Non-Null Count    Dtype         
---  ------  --------------    -----         
 0   Time    1564707 non-null  datetime64[ns]
 1   Open    1564707 non-null  float64       
 2   High    1564707 non-null  float64       
 3   Low     1564707 non-null  float64       
 4   Close   1564707 non-null  float64       
 5   Volume  1564707 non-null  int64         
dtypes: datetime64[ns](1), float64(4), int64(1)
memory usage: 83.6 MB
None


Time      0
Open      0
High      0
Low       0
Close     0
Volume    0
dtype: int64


57


In [None]:
#Implement RSI on CPU
def RSI_CPU_3(Data, period=14):
    Delta = Data['Close'].diff()
    Gain = Delta.where(Delta > 0, 0)
    Loss = -Delta.where(Delta < 0, 0)

    AverageGain = Gain.rolling(window=period, min_periods=1).mean()
    AverageLoss = Loss.rolling(window=period, min_periods=1).mean()

    RS = AverageGain / AverageLoss
    RSI = 100 - (100 / (1 + RS))
    Data[f'RSI_{period}'] = RSI

    return Data[['Time', 'Close', f'RSI_{period}']]

#Define the periods
Periods_3 = [7, 14, 21, 28, 35, 42, 49, 56]

#Calculate RSI
for period in Periods_3:
    RSI_CPU_3(Merged, period)

#Show the results
print("Displaying RSI Results:")
print('\n')
print(Merged[['Time', 'Close'] + [f'RSI_{p}' for p in Periods]].head(10).round(2))

Displaying RSI Results:


                       Time  Close  RSI_7  RSI_14  RSI_21  RSI_28
1369117 2008-12-16 06:30:00   1.53    NaN     NaN     NaN     NaN
1369118 2008-12-16 07:00:00   1.53   0.00    0.00    0.00    0.00
1369119 2008-12-16 07:30:00   1.53  56.16   56.16   56.16   56.16
1369120 2008-12-16 08:00:00   1.53  40.80   40.80   40.80   40.80
1369121 2008-12-16 08:30:00   1.52  27.33   27.33   27.33   27.33
1369122 2008-12-16 09:00:00   1.53  41.08   41.08   41.08   41.08
1369123 2008-12-16 09:30:00   1.52  37.62   37.62   37.62   37.62
1369124 2008-12-16 10:00:00   1.52  34.47   34.47   34.47   34.47
1369125 2008-12-16 10:30:00   1.52  44.17   38.12   38.12   38.12
1369126 2008-12-16 11:00:00   1.53  38.19   43.33   43.33   43.33


In [None]:
#Accelerate with GPU for Batch Processing
import cudf

def RSI_GPU_Batch(Data, periods):
    Data = cudf.DataFrame(Data)

    Delta = Data['Close'].diff()
    Gain = Delta.where(Delta > 0, 0)
    Loss = -Delta.where(Delta < 0, 0)

    for period in periods:
        AverageGain = Gain.rolling(window=period, min_periods=1).mean()
        AverageLoss = Loss.rolling(window=period, min_periods=1).mean()

        RS = AverageGain / AverageLoss
        RSI = 100 - (100 / (1 + RS))
        Data[f'RSI_{period}'] = RSI

    return Data.to_pandas()

#Define the periods
Periods_3 = [7, 14, 21, 28, 35, 42, 49, 56]

#Calculate RSI
Merged = RSI_GPU_Batch(Merged, Periods_3)

#Display the results
print("Displaying RSI Results:")
print('\n')
print(Merged[['Time', 'Close'] + [f'RSI_{p}' for p in Periods]].head(10).round(2))

Displaying RSI Results:


                       Time  Close  RSI_7  RSI_14  RSI_21  RSI_28
1369117 2008-12-16 06:30:00   1.53    NaN     NaN     NaN     NaN
1369118 2008-12-16 07:00:00   1.53   0.00    0.00    0.00    0.00
1369119 2008-12-16 07:30:00   1.53  56.16   56.16   56.16   56.16
1369120 2008-12-16 08:00:00   1.53  40.80   40.80   40.80   40.80
1369121 2008-12-16 08:30:00   1.52  27.33   27.33   27.33   27.33
1369122 2008-12-16 09:00:00   1.53  41.08   41.08   41.08   41.08
1369123 2008-12-16 09:30:00   1.52  37.62   37.62   37.62   37.62
1369124 2008-12-16 10:00:00   1.52  34.47   34.47   34.47   34.47
1369125 2008-12-16 10:30:00   1.52  44.17   38.12   38.12   38.12
1369126 2008-12-16 11:00:00   1.53  38.19   43.33   43.33   43.33


In [None]:
#Benchmark - Just Computation Time
import time
import cudf

Periods_3 = [7, 14, 21, 28, 35, 42, 49, 56]

#CPU
print("CPU Benchmarking:")
for period in Periods_3:
    start_time = time.time()
    RSI_CPU_3(Merged.copy(), period=period)
    end_time = time.time()
    cpu_time = end_time - start_time
    print(f"  CPU (Period {period}) RSI time: {cpu_time:.6f} seconds")
print('\n')

#GPU
print("GPU Benchmarking (Batch Processing):")
start_time = time.time()
RSI_GPU_Batch(Merged.copy(), Periods_3)  #Process all periods in a single batch
end_time = time.time()
gpu_time = end_time - start_time
print(f"  GPU (Batch for periods {Periods_3}) RSI time: {gpu_time:.6f} seconds")

CPU Benchmarking:
  CPU (Period 7) RSI time: 0.146483 seconds
  CPU (Period 14) RSI time: 0.158025 seconds
  CPU (Period 21) RSI time: 0.146217 seconds
  CPU (Period 28) RSI time: 0.145086 seconds
  CPU (Period 35) RSI time: 0.148029 seconds
  CPU (Period 42) RSI time: 0.149757 seconds
  CPU (Period 49) RSI time: 0.145589 seconds
  CPU (Period 56) RSI time: 0.145944 seconds


GPU Benchmarking (Batch Processing):
  GPU (Batch for periods [7, 14, 21, 28, 35, 42, 49, 56]) RSI time: 0.471885 seconds


In [None]:
#Load the dataset
from google.colab import drive
drive.mount('/content/drive')

EURUSD1_3 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/EURUSD1.csv', sep="\t", header=None)
EURUSD1_3.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

EURUSD5_3 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/EURUSD5.csv', sep="\t", header=None)
EURUSD5_3.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

EURUSD15_3 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/EURUSD15.csv', sep="\t", header=None)
EURUSD15_3.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

EURUSD30_3 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/EURUSD30.csv', sep="\t", header=None)
EURUSD30_3.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

#####

GBPUSD1_2 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/GBPUSD1.csv', sep="\t", header=None)
GBPUSD1_2.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

GBPUSD5_2 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/GBPUSD5.csv', sep="\t", header=None)
GBPUSD5_2.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

GBPUSD15_2 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/GBPUSD15.csv', sep="\t", header=None)
GBPUSD15_2.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

GBPUSD30_2 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/GBPUSD30.csv', sep="\t", header=None)
GBPUSD30_2.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

####

USDCAD1 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/USDCAD1.csv', sep="\t", header=None)
USDCAD1.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

USDCAD5 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/USDCAD5.csv', sep="\t", header=None)
USDCAD5.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

USDCAD15 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/USDCAD15.csv', sep="\t", header=None)
USDCAD15.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

USDCAD30 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/USDCAD30.csv', sep="\t", header=None)
USDCAD30.columns = ["Time", "Open", "High", "Low", "Close", "Volume"]

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#Combine EURUSD datasets
EURUSDx = pd.concat([EURUSD1_3, EURUSD5_3, EURUSD15_3, EURUSD30_3], ignore_index=True)

#Combine GBPUSD datasets
GBPUSDx = pd.concat([GBPUSD1_2, GBPUSD5_2, GBPUSD15_2, GBPUSD30_2], ignore_index=True)

#Combine USDCAD datasets
USDCADx = pd.concat([USDCAD1, USDCAD5, USDCAD15, USDCAD30], ignore_index=True)

#Merge both datasets
Big = pd.concat([EURUSDx, GBPUSDx, USDCADx], ignore_index=True)
Big['Time'] = pd.to_datetime(Big['Time'])
Big = Big.sort_values(by=['Time'])

In [None]:
#Initial check
print(Big.head(15))
print('\n')
print(Big.dtypes)
print('\n')
print(Big.shape)

                       Time     Open     High      Low    Close  Volume
1369117 2008-12-16 06:30:00  1.52810  1.52940  1.52720  1.52850    2074
1369118 2008-12-16 07:00:00  1.52850  1.52940  1.52450  1.52530    3769
1369119 2008-12-16 07:30:00  1.52525  1.53120  1.52410  1.52940    6964
1369120 2008-12-16 08:00:00  1.52925  1.53010  1.52580  1.52665   12089
1369121 2008-12-16 08:30:00  1.52660  1.52740  1.52105  1.52170   16300
1369122 2008-12-16 09:00:00  1.52170  1.52560  1.52105  1.52520   12405
1369123 2008-12-16 09:30:00  1.52480  1.52850  1.52045  1.52350   11497
1369124 2008-12-16 10:00:00  1.52350  1.52465  1.52055  1.52165   13134
1369125 2008-12-16 10:30:00  1.52160  1.52370  1.52030  1.52295    8341
1369126 2008-12-16 11:00:00  1.52295  1.52545  1.52260  1.52510   11960
1369127 2008-12-16 11:30:00  1.52500  1.52550  1.52280  1.52480   10905
1369128 2008-12-16 12:00:00  1.52460  1.52830  1.52275  1.52480   13987
1369129 2008-12-16 12:30:00  1.52480  1.53430  1.52480  1.53040 

In [None]:
#Initial check
print(Big.info())
print('\n')
print(Big.isnull().sum())
print('\n')
print(Big.duplicated().sum())

<class 'pandas.core.frame.DataFrame'>
Index: 2347158 entries, 1369117 to 1760293
Data columns (total 6 columns):
 #   Column  Dtype         
---  ------  -----         
 0   Time    datetime64[ns]
 1   Open    float64       
 2   High    float64       
 3   Low     float64       
 4   Close   float64       
 5   Volume  int64         
dtypes: datetime64[ns](1), float64(4), int64(1)
memory usage: 125.4 MB
None


Time      0
Open      0
High      0
Low       0
Close     0
Volume    0
dtype: int64


89


In [None]:
#Implement Complex RSI on CPU

def RSI_CPU_Complex(Data, periods=[14, 28], use_ema=True, smoothing=True):

    #Loop over all periods
    for period in periods:
        Delta = Data['Close'].diff()
        Gain = Delta.where(Delta > 0, 0)
        Loss = -Delta.where(Delta < 0, 0)

        #Using Exponential Moving Average (EMA) instead of Simple Moving Average (SMA)
        if use_ema:
            AverageGain = Gain.ewm(span=period, min_periods=1).mean()
            AverageLoss = Loss.ewm(span=period, min_periods=1).mean()
        else:
            AverageGain = Gain.rolling(window=period, min_periods=1).mean()
            AverageLoss = Loss.rolling(window=period, min_periods=1).mean()

        RS = AverageGain / AverageLoss
        RSI = 100 - (100 / (1 + RS))

        #Apply smoothing to RSI
        if smoothing:
            RSI = RSI.rolling(window=3, min_periods=1).mean()

        Data[f'RSI_{period}'] = RSI
    return Data[['Time', 'Close'] + [f'RSI_{p}' for p in periods]]

#Define periods
Periodsx = [7, 14, 21, 28, 35, 42, 49, 56]

#Calculate RSI for the defined periods
Big = RSI_CPU_Complex(Big, periods=Periodsx)

#Display results
print("Displaying RSI Results with Complexity:")
print('\n')
print(Big[['Time', 'Close'] + [f'RSI_{p}' for p in Periodsx]].head(10).round(2))

Displaying RSI Results with Complexity:


                       Time  Close  RSI_7  RSI_14  RSI_21  RSI_28  RSI_35  \
1369117 2008-12-16 06:30:00   1.53    NaN     NaN     NaN     NaN     NaN   
1369118 2008-12-16 07:00:00   1.53   0.00    0.00    0.00    0.00    0.00   
1369119 2008-12-16 07:30:00   1.53  31.54   29.83   29.25   28.96   28.78   
1369120 2008-12-16 08:00:00   1.53  34.47   33.49   33.12   32.93   32.81   
1369121 2008-12-16 08:30:00   1.52  41.67   41.70   41.65   41.61   41.58   
1369122 2008-12-16 09:00:00   1.53  35.80   36.17   36.27   36.31   36.33   
1369123 2008-12-16 09:30:00   1.52  35.02   35.19   35.24   35.27   35.29   
1369124 2008-12-16 10:00:00   1.52  38.04   37.94   37.88   37.85   37.83   
1369125 2008-12-16 10:30:00   1.52  36.64   36.70   36.72   36.73   36.73   
1369126 2008-12-16 11:00:00   1.53  42.67   40.34   39.70   39.41   39.24   

         RSI_42  RSI_49  RSI_56  
1369117     NaN     NaN     NaN  
1369118    0.00    0.00    0.00  
1369119 

In [None]:
#Implement Complex RSI on GPU with Batch Processing (Fixed for EMA)
import cudf

def RSI_GPU_Complex_Batch(Data, periods=[14, 28], use_ema=True, smoothing=True):
    Data = cudf.DataFrame(Data)

    Delta = Data['Close'].diff()
    Gain = Delta.where(Delta > 0, 0)
    Loss = -Delta.where(Delta < 0, 0)

    for period in periods:
        if use_ema:
            #Custom EMA calculation (without min_periods)
            AverageGain = Gain.ewm(span=period).mean()
            AverageLoss = Loss.ewm(span=period).mean()
        else:
            AverageGain = Gain.rolling(window=period).mean()
            AverageLoss = Loss.rolling(window=period).mean()

        RS = AverageGain / AverageLoss
        RSI = 100 - (100 / (1 + RS))

        #Apply smoothing to RSI
        if smoothing:
            RSI = RSI.rolling(window=3).mean()

        Data[f'RSI_{period}'] = RSI

    return Data.to_pandas()

#Define periods
Periodsx = [7, 14, 21, 28, 35, 42, 49, 56]

#Calculate RSI
Big = RSI_GPU_Complex_Batch(Big, periods=Periodsx, use_ema=True, smoothing=True)

#Display results
print("Displaying RSI Results with Complexity (GPU Batch Processing):")
print('\n')
print(Big[['Time', 'Close'] + [f'RSI_{p}' for p in Periodsx]].head(10).round(2))

Displaying RSI Results with Complexity (GPU Batch Processing):


                       Time  Close  RSI_7  RSI_14  RSI_21  RSI_28  RSI_35  \
1369117 2008-12-16 06:30:00   1.53    NaN     NaN     NaN     NaN     NaN   
1369118 2008-12-16 07:00:00   1.53    NaN     NaN     NaN     NaN     NaN   
1369119 2008-12-16 07:30:00   1.53    NaN     NaN     NaN     NaN     NaN   
1369120 2008-12-16 08:00:00   1.53  34.47   33.49   33.12   32.93   32.81   
1369121 2008-12-16 08:30:00   1.52  41.67   41.70   41.65   41.61   41.58   
1369122 2008-12-16 09:00:00   1.53  35.80   36.17   36.27   36.31   36.33   
1369123 2008-12-16 09:30:00   1.52  35.02   35.19   35.24   35.27   35.29   
1369124 2008-12-16 10:00:00   1.52  38.04   37.94   37.88   37.85   37.83   
1369125 2008-12-16 10:30:00   1.52  36.64   36.70   36.72   36.73   36.73   
1369126 2008-12-16 11:00:00   1.53  42.67   40.34   39.70   39.41   39.24   

         RSI_42  RSI_49  RSI_56  
1369117     NaN     NaN     NaN  
1369118     NaN    

In [None]:
#Benchmark - Just Computation Time
import time
import cudf

#Define periods
periods = [7, 14, 21, 28, 35, 42, 49, 56]

#CPU
print("CPU Benchmarking:")
for period in periods:
    start_time = time.time()
    Big = RSI_CPU_Complex(Big.copy(), periods=[period], use_ema=True, smoothing=True)  # Copy to prevent side-effects
    end_time = time.time()
    cpu_time = end_time - start_time
    print(f"  CPU (Period {period}) RSI time: {cpu_time:.6f} seconds")
print('\n')

#GPU
print("GPU Benchmarking (Batch Processing):")
start_time = time.time()
Big = RSI_GPU_Complex_Batch(Big.copy(), periods=periods, use_ema=True, smoothing=True)  #Process all periods in a single batch
end_time = time.time()
gpu_time = end_time - start_time
print(f"  GPU (Batch for periods {periods}) RSI time: {gpu_time:.6f} seconds")

CPU Benchmarking:
  CPU (Period 7) RSI time: 0.251997 seconds
  CPU (Period 14) RSI time: 0.199165 seconds
  CPU (Period 21) RSI time: 0.205727 seconds
  CPU (Period 28) RSI time: 0.206572 seconds
  CPU (Period 35) RSI time: 0.195340 seconds
  CPU (Period 42) RSI time: 0.216275 seconds
  CPU (Period 49) RSI time: 0.199567 seconds
  CPU (Period 56) RSI time: 0.209800 seconds


GPU Benchmarking (Batch Processing):
  GPU (Batch for periods [7, 14, 21, 28, 35, 42, 49, 56]) RSI time: 0.478138 seconds


In [None]:
#Benchmark - Just Computation Time (5 trials for more accuarcy)
import time
import cudf


periods = [7, 14, 21, 28, 35, 42, 49, 56]
num_trials = 5


#CPU Benchmarking
print("CPU Benchmarking:")
cpu_times = []  #List to store times for each trial
for trial in range(num_trials):
    trial_cpu_times = []
    for period in periods:
        start_time = time.time()
        Big = RSI_CPU_Complex(Big.copy(), periods=[period], use_ema=True, smoothing=True)
        end_time = time.time()
        cpu_time = end_time - start_time
        trial_cpu_times.append(cpu_time)
        print(f"  CPU (Trial {trial + 1}, Period {period}) RSI time: {cpu_time:.6f} seconds")
    cpu_times.append(trial_cpu_times)
print('\n')

#Calculate average CPU time
avg_cpu_times = [sum(times) / num_trials for times in zip(*cpu_times)]
for period, avg_time in zip(periods, avg_cpu_times):
    print(f"  Average CPU time for Period {period}: {avg_time:.6f} seconds")
print('\n')

#GPU Benchmarking using Batch Processing
print("GPU Benchmarking (Batch Processing):")
gpu_times = []  #List to store GPU times for each trial
for trial in range(num_trials):
    start_time = time.time()
    Big = RSI_GPU_Complex_Batch(Big.copy(), periods=periods, use_ema=True, smoothing=True)
    end_time = time.time()
    gpu_time = end_time - start_time
    gpu_times.append(gpu_time)
    print(f"  GPU (Trial {trial + 1}) Batch processing time: {gpu_time:.6f} seconds")

print('\n')

#Calculate average GPU time
avg_gpu_time = sum(gpu_times) / num_trials
print(f"  Average GPU time for Batch (Periods {periods}): {avg_gpu_time:.6f} seconds")

CPU Benchmarking:
  CPU (Trial 1, Period 7) RSI time: 0.322603 seconds
  CPU (Trial 1, Period 14) RSI time: 0.295823 seconds
  CPU (Trial 1, Period 21) RSI time: 0.286363 seconds
  CPU (Trial 1, Period 28) RSI time: 0.318377 seconds
  CPU (Trial 1, Period 35) RSI time: 0.317549 seconds
  CPU (Trial 1, Period 42) RSI time: 0.355678 seconds
  CPU (Trial 1, Period 49) RSI time: 0.303830 seconds
  CPU (Trial 1, Period 56) RSI time: 0.279147 seconds
  CPU (Trial 2, Period 7) RSI time: 0.195302 seconds
  CPU (Trial 2, Period 14) RSI time: 0.197526 seconds
  CPU (Trial 2, Period 21) RSI time: 0.201938 seconds
  CPU (Trial 2, Period 28) RSI time: 0.234758 seconds
  CPU (Trial 2, Period 35) RSI time: 0.206884 seconds
  CPU (Trial 2, Period 42) RSI time: 0.211177 seconds
  CPU (Trial 2, Period 49) RSI time: 0.203572 seconds
  CPU (Trial 2, Period 56) RSI time: 0.215446 seconds
  CPU (Trial 3, Period 7) RSI time: 0.212074 seconds
  CPU (Trial 3, Period 14) RSI time: 0.218301 seconds
  CPU (Trial 