<a href="https://colab.research.google.com/github/Gsingh225/CurrencyRegression/blob/main/LinearRegPredictingCurrency.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [None]:
import kagglehub

# Download latest version of the dataset
path = kagglehub.dataset_download("brunotly/foreign-exchange-rates-per-dollar-20002019")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/brunotly/foreign-exchange-rates-per-dollar-20002019?dataset_version_number=6...


100%|██████████| 1.21M/1.21M [00:00<00:00, 1.72MB/s]

Extracting files...
Path to dataset files: /root/.cache/kagglehub/datasets/brunotly/foreign-exchange-rates-per-dollar-20002019/versions/6





In [None]:
#read data
data_path = path + '/Foreign_Exchange_Rates.csv'
data = pd.read_csv(data_path)

print(data.head())
print(data.info())

   Unnamed: 0  Time Serie AUSTRALIA - AUSTRALIAN DOLLAR/US$  \
0           0  2000-01-03                            1.5172   
1           1  2000-01-04                            1.5239   
2           2  2000-01-05                            1.5267   
3           3  2000-01-06                            1.5291   
4           4  2000-01-07                            1.5272   

  EURO AREA - EURO/US$ NEW ZEALAND - NEW ZELAND DOLLAR/US$  \
0               0.9847                              1.9033   
1                 0.97                              1.9238   
2               0.9676                              1.9339   
3               0.9686                              1.9436   
4               0.9714                               1.938   

  UNITED KINGDOM - UNITED KINGDOM POUND/US$ BRAZIL - REAL/US$  \
0                                    0.6146             1.805   
1                                    0.6109            1.8405   
2                                    0.6092          

In [None]:
#extract relevent data and make it easier to read
thb_data = data[['Time Serie', 'THAILAND - BAHT/US$']].copy()
thb_data.rename(columns={'Time Serie': 'Date', 'THAILAND - BAHT/US$': 'Exchange_Rate'}, inplace=True)
thb_data.head()

Unnamed: 0,Date,Exchange_Rate
0,2000-01-03,36.97
1,2000-01-04,37.13
2,2000-01-05,37.1
3,2000-01-06,37.62
4,2000-01-07,37.3


In [None]:
#convert data into a form we can use, datetime and floats

thb_data['Date'] = pd.to_datetime(thb_data['Date'])
thb_data['Exchange_Rate'] = pd.to_numeric(thb_data['Exchange_Rate'], errors='coerce')

thb_data.dropna(inplace=True)
thb_data.sort_values('Date', inplace=True)

thb_data.head()

Unnamed: 0,Date,Exchange_Rate
0,2000-01-03,36.97
1,2000-01-04,37.13
2,2000-01-05,37.1
3,2000-01-06,37.62
4,2000-01-07,37.3


In [None]:
#Create lagged features for the predictive model

thb_data['Lag_1'] = thb_data['Exchange_Rate'].shift(1)
thb_data['Lag_2'] = thb_data['Exchange_Rate'].shift(2)

thb_data.dropna(inplace=True)

thb_data.head()

Unnamed: 0,Date,Exchange_Rate,Lag_1,Lag_2
2,2000-01-05,37.1,37.13,36.97
3,2000-01-06,37.62,37.1,37.13
4,2000-01-07,37.3,37.62,37.1
5,2000-01-10,37.27,37.3,37.62
6,2000-01-11,37.61,37.27,37.3


In [None]:
#define the features and targets

x = thb_data[['Lag_1', 'Lag_2']]
y = thb_data['Exchange_Rate']

print(x.head())
print('\n\n')
print(y.head())

   Lag_1  Lag_2
2  37.13  36.97
3  37.10  37.13
4  37.62  37.10
5  37.30  37.62
6  37.27  37.30



2    37.10
3    37.62
4    37.30
5    37.27
6    37.61
Name: Exchange_Rate, dtype: float64


In [None]:
# create the train and test split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=False)

x_train.head(),  y_train.head()

(   Lag_1  Lag_2
 2  37.13  36.97
 3  37.10  37.13
 4  37.62  37.10
 5  37.30  37.62
 6  37.27  37.30,
 2    37.10
 3    37.62
 4    37.30
 5    37.27
 6    37.61
 Name: Exchange_Rate, dtype: float64)

In [None]:
#train the actual model
model = LinearRegression()
model.fit(x_train, y_train)

#use the model to make predictions
y_pred = model.predict(x_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r_squared = model.score(x_test, y_test)

print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")
print(f"R-squared: {r_squared}")

Mean Squared Error: 0.0086492570072786
Mean Absolute Error: 0.06790474134533907
R-squared: 0.9971741217154321


In [None]:
"""

Overall a very simple linear regression model that aims to predict the value of the Thai Baht compared to the US Dollar

According to the R^2 and MSE this model preformsvery well, but I wouldn't use this for anything serious, this wasn't tested for overfitting.
This model is only for education, just a fun beginner ml project.

"""