In [1]:
# Import required libraries
import numpy as np
import pandas as pd
import hvplot.pandas
from pathlib import Path
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

## Data Loading and Visualization

In [2]:
# Read currency data
df_currency = pd.read_csv('Resources/Price-Data/PKR_Pakistani-Rupee.csv')

# Display sample data
df_currency.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2002-04-01,60.153999,60.18,59.849998,60.154999,0
1,2003-01-02,58.276501,58.32,58.25,58.275002,0
2,2003-04-21,57.811001,57.830002,57.77,57.814999,0
3,2003-12-01,57.0,57.0,57.0,57.0,0
4,2003-12-02,55.159,55.159,55.159,57.0,0


In [3]:
# Create a scatter plot of Open versus the Close information
df_currency.hvplot.line(
    x='Date',
    y= 'Open',
    xlabel='Date',
    ylabel='Currency',
    title='Currency vs Time'
)

## Data Preparation

In [4]:
# Convert the 'Date' column to datetime format
df_currency['Date'] = pd.to_datetime(df_currency['Date'])

# Calculate the number of days since the start date
start_date = df_currency['Date'].min()
df_currency['Days'] = (df_currency['Date'] - start_date).dt.days

# Reformat data of the independent variable X as a single-column array
X = df_currency['Days'].values.reshape(-1, 1)

# Display sample data
X[:5]

array([[  0],
       [276],
       [385],
       [609],
       [610]], dtype=int64)

In [5]:
# The shape of X is 30 samples, with a single feature (column)
X.shape

(5179, 1)

In [6]:
# Create an array for the dependent variable y
y = df_currency["Close"]

## Building the Linear Regression Model

In [7]:
# Create a model with scikit-learn
model = LinearRegression()

In [8]:
# Fit the data into the model
model.fit(X, y)

In [9]:
# Display the slope
print(f"Model's slope: {model.coef_}")

Model's slope: [0.01870428]


In [10]:
# Display the y-intercept
print(f"Model's y-intercept: {model.intercept_}")

Model's y-intercept: 47.67864589288652


In [11]:
# Display the model's best fit line formula
print(f"Model's formula: y = {model.intercept_} + {model.coef_[0]}X")

Model's formula: y = 47.67864589288652 + 0.018704275697959426X


In [12]:
# Predict the currency for a future date
future_date = pd.to_datetime('2024-01-01')  # Change this date to your desired future date
future_days = (future_date - start_date).days  # Calculate days since start date for the future date
predicted_currency = model.predict([[future_days]])  # Predict for the future date

# Display the prediction for the future date
print(f"Predicted closing currency on {future_date} is: {predicted_currency[0]:.2f}")

Predicted closing currency on 2024-01-01 00:00:00 is: 196.28


In [13]:
# Make predictions using the X set
predicted_y_values = model.predict(X)

In [14]:
# Create a copy of the original data
df_currency_predicted = df_currency.copy()

# Add a column with the predicted currency values
df_currency_predicted["currency_predicted"] = predicted_y_values

# Display sample data
df_currency_predicted.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Days,currency_predicted
0,2002-04-01,60.153999,60.18,59.849998,60.154999,0,0,47.678646
1,2003-01-02,58.276501,58.32,58.25,58.275002,0,276,52.841026
2,2003-04-21,57.811001,57.830002,57.77,57.814999,0,385,54.879792
3,2003-12-01,57.0,57.0,57.0,57.0,0,609,59.06955
4,2003-12-02,55.159,55.159,55.159,57.0,0,610,59.088254


In [15]:
# Create a line plot of open versus the predicted currency values
best_fit_line = df_currency_predicted.hvplot.line(
    x='Date',
    y='currency_predicted',
    xlabel='Date',
    ylabel='Predicted Currency',
    title='Predicted Currency Vs. Time'
)
best_fit_line

In [16]:
# Superpose the original data and the best fit line
# Create a line plot of 'Open' versus the predicted currency values
df_currency_predicted.hvplot.line(
    x='Date',
    y=['Open', 'currency_predicted'], 
    xlabel='Date',
    ylabel='Currency',
    title='Currency vs Predicted Currency'
)

## Linear Regression Model Assessment

In [17]:
# Compute metrics for the linear regression model: score, r2, mse, rmse, std
score = model.score(X, y)
predicted_y_values = model.predict(X)
r2 = r2_score(y, predicted_y_values)
mse = mean_squared_error(y, predicted_y_values)
rmse = np.sqrt(mse)
std = np.std(y)

# Print relevant metrics
print(f"The score is {score}.")
print(f"The r2 is {r2}.")
print(f"The mean squared error is {mse}.")
print(f"The root mean squared error is {rmse}.")
print(f"The standard deviation is {std}.")

The score is 0.0011415396558226432.
The r2 is 0.0011415396558226432.
The mean squared error is 1360535.8186216755.
The root mean squared error is 1166.4200866847568.
The standard deviation is 1167.0864146110218.
