!pip install pandas
!pip install numpy
!pip install scikit-learn
!pip install yfinance

In [70]:
import pandas as pd
import numpy as np
import yfinance as yf
import random

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LinearRegression

In [77]:
stock ='EURUSD=X'
forex_data = yf.download(stock, start='2016-01-02', end='2022-07-03')

[*********************100%***********************]  1 of 1 completed


In [7]:
forex_data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-01-04,1.085505,1.094600,1.080600,1.085399,1.085399,0
2016-01-05,1.082603,1.084105,1.071201,1.082755,1.082755,0
2016-01-06,1.075199,1.077203,1.072200,1.075199,1.075199,0
2016-01-07,1.077795,1.087600,1.077400,1.077900,1.077900,0
2016-01-08,1.092705,1.092896,1.080801,1.092598,1.092598,0
...,...,...,...,...,...,...
2022-06-27,1.056613,1.061425,1.055086,1.056613,1.056613,0
2022-06-28,1.058089,1.060502,1.050575,1.058089,1.058089,0
2022-06-29,1.052355,1.053630,1.046803,1.052355,1.052355,0
2022-06-30,1.044659,1.047878,1.038389,1.044659,1.044659,0


In [45]:
fx_data = ['Open', 'High', 'Low', 'Close']

In [46]:
for col in fx_data:
    forex_data[col] = forex_data[col].replace(0, np.NaN)
    mean = int(forex_data[col].mean(skipna=True))
    forex_data[col] = forex_data[col].replace(np.NaN, mean)

In [34]:
#Calculate EMA
def calculate_ema(data, period):
    close_prices = data['Close']
    ema_values = close_prices.ewm(span=period, adjust=False).mean()
    return ema_values

In [36]:
# Specify the window or period for calculating EMA
ema_period= 12
# Calculate EMA using the function and add it as a new column 'EMA'
forex_data['EMA'] = calculate_ema(forex_data, ema_period)

In [38]:
#Calculate RSI


def calculate_rsi(data, period=14):
    close_prices = data['Close']
    price_diff = close_prices.diff()
    positive_diff = price_diff.where(price_diff > 0, 0)
    negative_diff = -price_diff.where(price_diff < 0, 0)
    
    avg_gain = positive_diff.rolling(period).mean()
    avg_loss = negative_diff.rolling(period).mean()
    
    relative_strength = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + relative_strength))
    
    return rsi

In [47]:
# Calculate RSI using the function and add it as a new column 'RSI'
forex_data['RSI'] = calculate_rsi(forex_data)

# Print the updated DataFrame with the RSI column
print(forex_data)


                Open      High       Low     Close  Adj Close  Volume  \
Date                                                                    
2016-01-04  1.085505  1.094600  1.080600  1.085399   1.085399       0   
2016-01-05  1.082603  1.084105  1.071201  1.082755   1.082755       0   
2016-01-06  1.075199  1.077203  1.072200  1.075199   1.075199       0   
2016-01-07  1.077795  1.087600  1.077400  1.077900   1.077900       0   
2016-01-08  1.092705  1.092896  1.080801  1.092598   1.092598       0   
...              ...       ...       ...       ...        ...     ...   
2022-06-27  1.056613  1.061425  1.055086  1.056613   1.056613       0   
2022-06-28  1.058089  1.060502  1.050575  1.058089   1.058089       0   
2022-06-29  1.052355  1.053630  1.046803  1.052355   1.052355       0   
2022-06-30  1.044659  1.047878  1.038389  1.044659   1.044659       0   
2022-07-01  1.047768  1.047790  1.036989  1.047768   1.047768       0   

                 EMA        RSI  
Date            

In [50]:
# Calculate macd momentum indicator

def calculate_macd(data, short_period=12, long_period=26, signal_period=9):
    close_prices = data['Close']
    ema_short = close_prices.ewm(span=short_period, adjust=False).mean()
    ema_long = close_prices.ewm(span=long_period, adjust=False).mean()
    
    macd_line = ema_short - ema_long
    signal_line = macd_line.ewm(span=signal_period, adjust=False).mean()
    macd_histogram = macd_line - signal_line
    
    return macd_line, signal_line, macd_histogram

In [51]:
# Calculate MACD using the function and add it as new columns 'MACD Line', 'Signal Line', 'MACD Histogram'
forex_data['MACD Line'], forex_data['Signal Line'], forex_data['MACD Histogram'] = calculate_macd(forex_data)
# Print the updated DataFrame with the MACD columns
print(forex_data)

                Open      High       Low     Close  Adj Close  Volume  \
Date                                                                    
2016-01-04  1.085505  1.094600  1.080600  1.085399   1.085399       0   
2016-01-05  1.082603  1.084105  1.071201  1.082755   1.082755       0   
2016-01-06  1.075199  1.077203  1.072200  1.075199   1.075199       0   
2016-01-07  1.077795  1.087600  1.077400  1.077900   1.077900       0   
2016-01-08  1.092705  1.092896  1.080801  1.092598   1.092598       0   
...              ...       ...       ...       ...        ...     ...   
2022-06-27  1.056613  1.061425  1.055086  1.056613   1.056613       0   
2022-06-28  1.058089  1.060502  1.050575  1.058089   1.058089       0   
2022-06-29  1.052355  1.053630  1.046803  1.052355   1.052355       0   
2022-06-30  1.044659  1.047878  1.038389  1.044659   1.044659       0   
2022-07-01  1.047768  1.047790  1.036989  1.047768   1.047768       0   

                 EMA        RSI  MACD Line  Signal

In [52]:
#Calculate ATR

def calculate_atr(data, period=14):
    high_prices = data['High']
    low_prices = data['Low']
    close_prices = data['Close']
    
    high_minus_low = high_prices - low_prices
    high_minus_close = np.abs(high_prices - close_prices.shift())
    low_minus_close = np.abs(low_prices - close_prices.shift())
    
    true_range = pd.concat([high_minus_low, high_minus_close, low_minus_close], axis=1).max(axis=1)
    atr = true_range.rolling(period).mean()
    
    return atr

In [75]:
# Calculate ATR using the function and add it as a new column 'ATR'
forex_data['ATR'] = calculate_atr(forex_data)

# Print the updated DataFrame with the ATR column
print(forex_data)

          Open      High       Low     Close  Adj Close  Volume       EMA  \
0     1.087701  1.092200  1.078200  1.087701   1.087701       0  1.087880   
1     1.086095  1.086400  1.079599  1.086201   1.086201       0  1.087622   
2     1.079902  1.085000  1.079203  1.079902   1.079902       0  1.086434   
3     1.085376  1.088000  1.082000  1.085305   1.085305       0  1.086261   
4     1.086401  1.091200  1.085305  1.086496   1.086496       0  1.086297   
...        ...       ...       ...       ...        ...     ...       ...   
1674  1.056613  1.061425  1.055086  1.056613   1.056613       0  1.054791   
1675  1.058089  1.060502  1.050575  1.058089   1.058089       0  1.055298   
1676  1.052355  1.053630  1.046803  1.052355   1.052355       0  1.054846   
1677  1.044659  1.047878  1.038389  1.044659   1.044659       0  1.053278   
1678  1.047768  1.047790  1.036989  1.047768   1.047768       0  1.052431   

            RSI  MACD Line  Signal Line  MACD Histogram       ATR  Trend  


In [55]:
# #Calculate day feature

# def calculate_day_feature(data):
#     date_index = pd.to_datetime(data['Date'])
#     day_of_week = date_index.dt.dayofweek
#     day_feature = pd.get_dummies(day_of_week, prefix='Day')
    
#     return day_feature

In [57]:
# # Calculate day feature using the function and add it as new columns prefixed with 'Day_'
# day_feature = calculate_day_feature(forex_data)
# forex_data = pd.concat([forex_data, day_feature], axis=1)

# # Print the updated DataFrame with the day feature columns
# print(forex_data)

In [63]:
#Trend determination 

def add_trend_column(data):
    # Calculate percentage change in the 'Close' price
    data['Close_pct_change'] = data['Close'].pct_change()

    # Define the condition for an uptrend
    uptrend_condition = data['Close_pct_change'] > 0

    # Create the 'Trend' column
    data['Trend'] = -1  # Default value is '-1'
    data.loc[uptrend_condition, 'Trend'] = 1

    # Drop the 'Close_pct_change' column
    data.drop(columns=['Close_pct_change'], inplace=True)

    return data

In [81]:
# Add the trend column using the function
forex_data = add_trend_column(forex_data)

# Print the updated DataFrame with the trend column
print(forex_data)
print(forex_data.columns)

                Open      High       Low     Close  Adj Close  Volume  Trend
Date                                                                        
2016-01-04  1.085505  1.094600  1.080600  1.085399   1.085399       0     -1
2016-01-05  1.082603  1.084105  1.071201  1.082755   1.082755       0     -1
2016-01-06  1.075199  1.077203  1.072200  1.075199   1.075199       0     -1
2016-01-07  1.077795  1.087600  1.077400  1.077900   1.077900       0      1
2016-01-08  1.092705  1.092896  1.080801  1.092598   1.092598       0      1
...              ...       ...       ...       ...        ...     ...    ...
2022-06-27  1.056613  1.061425  1.055086  1.056613   1.056613       0      1
2022-06-28  1.058089  1.060502  1.050575  1.058089   1.058089       0      1
2022-06-29  1.052355  1.053630  1.046803  1.052355   1.052355       0     -1
2022-06-30  1.044659  1.047878  1.038389  1.044659   1.044659       0     -1
2022-07-01  1.047768  1.047790  1.036989  1.047768   1.047768       0      1

In [66]:
# Remove rows with NaN values
forex_data = forex_data.dropna()

# Reset the index
forex_data = forex_data.reset_index(drop=True)

In [67]:
#Training the model

# Split the data into features (X) and target variable (y)
X = forex_data.drop(columns=['Open', 'High', 'Low', 'Close', 'EMA', 'RSI', 'MACD Line', 'Signal Line', 'MACD Histogram'])# Features
y = forex_data['Trend']  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create a KNN classifier and train the model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train)

# Evaluate the model
accuracy = knn.score(X_test_scaled, y_test)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [69]:
# Define the features and target variables
features = ['EMA', 'RSI', 'ATR']
target = 'Final_Signal'

# Split the data into training and testing sets
X = forex_data[features]
y = forex_data[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create a KNN classifier and train the model
k = 5  # Number of neighbors
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = knn.predict(X_test_scaled)

# Calculate the accuracy of the predictions
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

KeyError: 'Final_Signal'