In [41]:
# Forex Prediction Data Analysis
# Objectives
# 1. Load and preprocess the dataset
# 2. Inspect the data for empty values, wrong data types, wrong formats, duplicates and outliers
# 3. Analyze the data to find patterns and relationships
# 4. Visualize the data to understand trends and distributions
# 5. Evaluate the model performance using appropriate metrics
# 6. Identify patterns and correlations in EUR/USD exchange rates movements
# 7. Save the cleaned and processed data for future use

In [42]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

forex_data = pd.read_csv("../../forex_predictions_data.csv")
forex_data

Unnamed: 0,Date,Open,High,Low,Close,Volume,Predicted_Close,Currency_Pair,Signal,Confidence
0,2024-01-01,1.18727,1.92461,0.85312,1.18154,2201,1.22984,EUR/USD,Hold,0.90
1,2024-01-02,1.47536,1.82881,0.54067,1.32296,error,1.03797,EUR/USD,Sell,
2,2024-01-03,1.36600,1.78415,0.54242,1.28539,4420,1.03888,EUR/USD,Sell,
3,2024-01-04,1.29933,1.54684,0.99332,1.17805,4079,1.00117,EUR/USD,Sell,0.64
4,2024-01-05,1.07801,1.68386,0.68714,,1832,1.48385,EUR/USD,Sell,0.68
...,...,...,...,...,...,...,...,...,...,...
224,2024-04-14,1.45378,1.63997,0.61432,1.15586,error,1.11528,EUR/USD,Hold,0.83
225,2024-06-20,1.00829,1.99525,0.84520,,4850,1.23274,EUR/USD,Hold,0.96
226,2024-07-27,1.00253,1.94385,,1.27982,2524,1.49507,EUR/USD,Sell,0.93
227,2024-03-16,1.36450,1.56353,0.93303,1.33260,2757,1.04585,EUR/USD,Sell,0.76


In [43]:
forex_data.describe()

Unnamed: 0,Open,High,Low,Close,Predicted_Close,Confidence
count,224.0,220.0,225.0,212.0,222.0,218.0
mean,1.239946,1.753113,0.75003,1.245072,1.250415,0.756468
std,0.148956,0.147816,0.150434,0.140594,0.156102,0.135125
min,1.00253,1.50542,0.50568,1.00232,1.00012,0.5
25%,1.11186,1.638832,0.61432,1.122535,1.11528,0.6525
50%,1.248105,1.7634,0.74631,1.23591,1.259605,0.76
75%,1.3651,1.877682,0.87757,1.368805,1.392158,0.87
max,1.49344,1.99525,0.99986,1.49844,1.49968,1.0


In [44]:
forex_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 229 entries, 0 to 228
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Date             226 non-null    object 
 1   Open             224 non-null    float64
 2   High             220 non-null    float64
 3   Low              225 non-null    float64
 4   Close            212 non-null    float64
 5   Volume           226 non-null    object 
 6   Predicted_Close  222 non-null    float64
 7   Currency_Pair    229 non-null    object 
 8   Signal           227 non-null    object 
 9   Confidence       218 non-null    float64
dtypes: float64(6), object(4)
memory usage: 18.0+ KB


In [45]:
forex_data.shape

(229, 10)

In [46]:
#Handling bad data: empty cells, wrong data, wrong formats, duplicates, and outliers
#Empty values
print("Missing values in each column:")
forex_data.isnull().sum()

Missing values in each column:


Date                3
Open                5
High                9
Low                 4
Close              17
Volume              3
Predicted_Close     7
Currency_Pair       0
Signal              2
Confidence         11
dtype: int64

In [47]:
#Handling missing values
#For numerical columns, we can fill the missing values with the mean or median
#0789354028

numeric_cols=['Open','High','Low','Close','Volume','Predicted_Close','Confidence']

for col in numeric_cols:
    if col in forex_data.columns:
        pd.to_numeric(forex_data[col],errors='coerce')
        forex_data[col]=forex_data[col].fillna(forex_data[col].median)

forex_data.isnull().sum()

Date               3
Open               0
High               0
Low                0
Close              0
Volume             0
Predicted_Close    0
Currency_Pair      0
Signal             2
Confidence         0
dtype: int64

In [48]:
#For categorical columns we can fill missing values with the mode
categorical_cols=['Signal']
for col in categorical_cols:
    forex_data[col]=forex_data[col].fillna(forex_data[col].mode)

forex_data.isnull().sum()

Date               3
Open               0
High               0
Low                0
Close              0
Volume             0
Predicted_Close    0
Currency_Pair      0
Signal             0
Confidence         0
dtype: int64

In [49]:
#Wrong formats
forex_data['Date']=pd.to_datetime(forex_data['Date'],errors='coerce')
#Fill NaN values in the volume with the median
forex_data['Volume']=pd.to_numeric(forex_data["Volume"], errors='coerce')
forex_data['Volume']=forex_data['Volume'].fillna(forex_data['Volume'].median)
#Clean the confidence column
forex_data['Confidence']=pd.to_numeric(forex_data['Confidence'],errors='coerce')
#replace null
forex_data['Confidence']=forex_data['Confidence'].fillna(forex_data['Confidence'].median)


In [50]:
forex_data.isnull().sum()

Date               3
Open               0
High               0
Low                0
Close              0
Volume             0
Predicted_Close    0
Currency_Pair      0
Signal             0
Confidence         0
dtype: int64

In [51]:
#Handling outliers


In [52]:
#Analysis of the data
#Correlation analysis
corr_matrix=forex_data[['Open', 'High', 'Low', 'Close', 'Volume', 'Predicted_Close', 'Confidence']].corr()

#Plot the correlation heatmap
plt.figure(figsize=(10,8))
sns.heatmap(corr_matrix,annot=True,cmap='coolwarm',fmt='.0f')
plt.title("Correlation Heatmap")
plt.show()

TypeError: float() argument must be a string or a real number, not 'method'

In [None]:
#Actual vs Predicted Close prices


In [None]:
#Signal Analysis