# Machine Learning project using simple regression 
Analysis to determine how certain factors impact the ZAR/USD exchange rate

In [12]:
# Install necessary libraries
!pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp312-cp312-win_amd64.whl.metadata (15 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downloading scipy-1.15.2-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.6.1-cp312-cp312-win_amd64.whl (11.1 MB)
   ---------------------------------------- 0.0/11.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/11.1 MB ? eta -:--:--
    --------------------------------------- 0.3/11.1 MB ? eta -:--:--
   - -------------------------------------- 0.5/11.1 MB 1.1 MB/s eta 0:00:10
   -- ------------------------------------- 0.8/11.1 MB 1.5 MB/s eta 0:00:08
   ---- ----------------------------------- 1.3/11.1 MB 1.8 MB/s eta 0:00:06
   ---- ----------------------------------- 1.3/11.

In [39]:
#Import libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Ridge

In [7]:
#Create a pandas dataframe
rd_df = pd.read_csv('rand-dollar.csv', index_col = 0)

rd_df.head()

Unnamed: 0,ZAR/USD,Value of Exports (USD),Value of Exports (ZAR),Value of Imports (USD),Value of Imports (ZAR),IMF Reserve Position (USD),Foreign Exchange (USD),Claims on Non-residents (USD),Liabilities to Non-residents (USD),Savings Rate,Lending Rate,Government Bonds,"Financial Market Prices, Equities Index",Consumer Price Index
2008M01,7.01,5611.9,39356.82,8105.11,56841.94,1.92,29526.78,51547.61,37752.29,4.53,14.5,8.36,312.97,85.48
2008M02,7.66,6126.16,46946.33,8159.43,62527.78,2.01,29943.04,59702.31,45927.67,5.61,14.5,8.69,335.09,85.91
2008M03,7.97,6417.48,51150.89,8010.87,63851.12,2.05,30246.54,56448.18,42565.17,5.62,14.5,9.16,348.84,87.04
2008M04,7.79,7215.71,56174.34,9112.97,70944.49,2.03,30399.21,55534.75,43342.7,5.8,15.0,9.15,358.47,87.61
2008M05,7.62,7379.67,56240.45,9090.14,69275.97,2.1,30467.81,57682.89,46533.13,6.5,15.0,9.51,371.98,88.32


In [15]:
#Separate into features (X) and target (y)
X = rd_df[['Value of Exports (ZAR)']]
y = rd_df['ZAR/USD']

#Split into train and test sets (80/20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

#Train a simple linear regression model
lm = LinearRegression()
lm.fit(X_train, y_train)

In [16]:
#Get the intercept
intercept_value = lm.intercept_
print(intercept_value)

3.2941036551619103


In [20]:
#Get the slope
slope_value = lm.coef_
print(slope_value)

[8.67712634e-05]


In [21]:
#Get the value the exchange rate in a month where the exports total are R100000
predicted_value = intercept_value + (slope_value * 100000)
print(predicted_value)

[11.97122999]


In [24]:
#Get the Mean Squared Error(MSE)

#Generate predictions for the test set
y_pred = lm.predict(X_test)

#Calculate MSE
mse = mean_squared_error(y_test, y_pred)
print(mse)

8.221852113297063


In [25]:
#Get the R² value
r_squared = lm.score(X_test, y_test)
print(r_squared)

-8.448868004702911


In [32]:
# ZAR/USD for August 2017
predicted_value_2017M08 = (intercept_value + (slope_value * 103199.17))[0]
print(predicted_value_2017M08)

12.24882601767889


In [34]:
#Get absolute error for the August 2017 prediction
actual_value = y_test[X_test['Value of Exports (ZAR)'] == 103199.17].values[0]
absolute_error = abs(actual_value - predicted_value_2017M08)
print(absolute_error)

0.9811739823211099


In [38]:
#Get correlations between predictors and the target variable
#Calculate the correlation matrix
correlation_matrix = rd_df.corr()

#Extract correlation between predictors and the target 
target_correlations = correlation_matrix['ZAR/USD'].abs()
print(target_correlations.sort_values(ascending = False))

ZAR/USD                                    1.000000
Consumer Price Index                       0.872660
IMF Reserve Position (USD)                 0.855311
Value of Imports (ZAR)                     0.791994
Value of Exports (ZAR)                     0.784756
Claims on Non-residents (USD)              0.727278
Liabilities to Non-residents (USD)         0.472230
Value of Imports (USD)                     0.350606
Value of Exports (USD)                     0.335780
Foreign Exchange (USD)                     0.317203
Government Bonds                           0.191455
Financial Market Prices, Equities Index    0.139858
Lending Rate                               0.056806
Savings Rate                               0.041555
Name: ZAR/USD, dtype: float64
