In [1]:
# Import required libraries
import numpy as np
import pandas as pd
import hvplot.pandas
from pathlib import Path
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

## Data Loading and Visualization

In [2]:
# Read salary data
file_path = Path("Resources/fx_interest.csv")
df = pd.read_csv(file_path)

# Display sample data
df.head()

Unnamed: 0,Date,Relative_interest_rate,Open,High,Low,Close,Adj Close,AU_interest_rate,US_interest_rate
0,Jan-13,0.96,1.038799,1.059165,1.038217,1.041884,1.041884,3.0,0.13
1,Feb-13,0.96,1.043297,1.045828,1.018558,1.023227,1.023227,3.0,0.13
2,Mar-13,0.96,1.021826,1.049803,1.011757,1.040875,1.040875,3.0,0.13
3,Apr-13,0.96,1.041309,1.058391,1.022275,1.034404,1.034404,3.0,0.13
4,May-13,0.95,1.037215,1.038314,0.953016,0.966931,0.966931,2.75,0.13


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 131 entries, 0 to 130
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Date                    131 non-null    object 
 1   Relative_interest_rate  131 non-null    float64
 2   Open                    131 non-null    float64
 3   High                    131 non-null    float64
 4   Low                     131 non-null    float64
 5   Close                   131 non-null    float64
 6   Adj Close               131 non-null    float64
 7   AU_interest_rate        131 non-null    float64
 8   US_interest_rate        130 non-null    float64
dtypes: float64(8), object(1)
memory usage: 9.3+ KB


In [4]:
# Create a scatter plot of years_experience versus the salary information
fx_plot = df.hvplot.scatter(
    #x="AU_interest_rate",
     x="Relative_interest_rate",
   #y="Close",
    y="Open",
    title="Exchange Rate vs Interest Rate "
)
fx_plot

## Data Preparation

In [5]:
# Reformat data of the independent variable X as a single-column array
X = df["Relative_interest_rate"].values.reshape(-1, 1)
#X = df["AU_interest_rate"].values.reshape(-1, 1)
# Display sample data
X[:5]

array([[0.96],
       [0.96],
       [0.96],
       [0.96],
       [0.95]])

In [6]:
# The shape of X is 30 samples, with a single feature (column)
X.shape

(131, 1)

In [7]:
# Create an array for the dependent variable y
y = df["Open"]

## Building the Linear Regression Model

In [8]:
# Create a model with scikit-learn
model = LinearRegression()

In [9]:
# Fit the data into the model
model.fit(X, y)

In [10]:
# Display the slope
print(f"Model's slope: {model.coef_}")

Model's slope: [0.03604583]


In [11]:
# Display the y-intercept
print(f"Model's y-intercept: {model.intercept_}")

Model's y-intercept: 0.761321744535376


In [12]:
# Display the model's best fit line formula
print(f"Model's formula: y = {model.intercept_} + {model.coef_[0]}X")

Model's formula: y = 0.761321744535376 + 0.03604582752818702X


In [13]:
# Display the formula to predict the AU_interest_rate is 0.5
print(f"Model's formula: y = {model.intercept_} + {model.coef_[0]} * 0.5")

# Predict the exchange rate when  the AU_interest_rate is 5.0
y_5 = model.intercept_ + model.coef_[0] * 5

# Display the prediction
print(f"Predicted AUD/USD when the Relative_interest_rate is 0.5: ${y_5:.2f}")

Model's formula: y = 0.761321744535376 + 0.03604582752818702 * 0.5
Predicted AUD/USD when the Relative_interest_rate is 0.5: $0.94


In [14]:
# Make predictions using the X set
predicted_y_values = model.predict(X)

In [15]:
# Create a copy of the original data
df_fx_predicted = df.copy()

# Add a column with the predicted salary values
df_fx_predicted["AUD/USD_predicted"] = predicted_y_values

# Display sample data
df_fx_predicted.head()

Unnamed: 0,Date,Relative_interest_rate,Open,High,Low,Close,Adj Close,AU_interest_rate,US_interest_rate,AUD/USD_predicted
0,Jan-13,0.96,1.038799,1.059165,1.038217,1.041884,1.041884,3.0,0.13,0.795926
1,Feb-13,0.96,1.043297,1.045828,1.018558,1.023227,1.023227,3.0,0.13,0.795926
2,Mar-13,0.96,1.021826,1.049803,1.011757,1.040875,1.040875,3.0,0.13,0.795926
3,Apr-13,0.96,1.041309,1.058391,1.022275,1.034404,1.034404,3.0,0.13,0.795926
4,May-13,0.95,1.037215,1.038314,0.953016,0.966931,0.966931,2.75,0.13,0.795565


In [16]:
# Create a line plot of years_experience versus the predicted salary values
best_fit_line = df_fx_predicted.hvplot.line(
    x = "Relative_interest_rate",
    y = "AUD/USD_predicted",
    color = "red"
)
best_fit_line

In [17]:
# Superpose the original data and the best fit line
fx_plot * best_fit_line

## Linear Regression Model Assessment

In [18]:
# Import relevant metrics from scikit-learn
from sklearn.metrics import mean_squared_error, r2_score

In [19]:
# Compute metrics for the linear regression model: score, r2, mse, rmse, std
score = model.score(X, y, sample_weight=None)
r2 = r2_score(y, predicted_y_values)
mse = mean_squared_error(y, predicted_y_values)
rmse = np.sqrt(mse)
std = np.std(y)

# Print relevant metrics.
print(f"The score is {score}.")
print(f"The r2 is {r2}.")
print(f"The mean squared error is {mse}.")
print(f"The root mean squared error is {rmse}.")
print(f"The standard deviation is {std}.")

The score is 0.17379318076108718.
The r2 is 0.17379318076108718.
The mean squared error is 0.007334500668597201.
The root mean squared error is 0.08564169935608004.
The standard deviation is 0.09421952027334059.
