# Import Necessary Library


In [None]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, Lasso


# Load and Prepare Data

In [None]:
# Load the dataset
data = pd.read_csv('rand-dollar.csv', index_col=0)

# Separate features (X) and target variable (y)
X1 = data.drop('ZAR/USD', axis=1)  # All columns except 'ZAR/USD'
y1 = data['ZAR/USD']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X1, y1, test_size=0.2, shuffle=False)


In [None]:
data.tail()

Unnamed: 0,ZAR/USD,Value of Exports (USD),Value of Exports (ZAR),Value of Imports (USD),Value of Imports (ZAR),IMF Reserve Position (USD),Foreign Exchange (USD),Claims on Non-residents (USD),Liabilities to Non-residents (USD),Savings Rate,Lending Rate,Government Bonds,"Financial Market Prices, Equities Index",Consumer Price Index
2017M08,13.23,7799.19,103199.17,7724.11,102205.79,661.54,38889.0,43768.32,33965.83,4.36,10.25,9.12,167.16,146.4
2017M09,13.16,7761.13,102150.48,7810.59,102801.49,661.5,41424.0,44318.4,34351.03,4.4,10.25,9.04,168.34,147.11
2017M10,13.71,7601.5,104205.42,7661.57,105028.95,656.62,41018.0,44003.77,34206.03,4.41,10.25,9.4,175.15,147.53
2017M11,14.06,8264.55,116185.99,7704.4,108311.23,654.14,42372.0,42813.27,32917.8,4.42,10.25,9.77,178.42,147.67
2017M12,13.25,7874.08,104320.26,7068.85,93652.18,638.23,42735.0,47101.71,37918.26,4.44,10.25,9.47,170.16,148.38


# Create and Train Model

In [None]:
# Create a linear regression model using only 'Value of Exports (ZAR)' as a feature
model = LinearRegression()

# Train the model using the training data
model.fit(X_train[['Value of Exports (ZAR)']], y_train)


## What is the value of the intercept of the model?



In [None]:
intercept = model.intercept_
print("Intercept:", intercept)


Intercept: 3.2941036551619067


## What is the value of the slope of this model?

In [None]:
slope = model.coef_
print("Slope:", slope)


Slope: [8.67712634e-05]


## What is the predicted value of the exchange rate in a month where exports total R100 000?

In [None]:
predicted_exchange_rate = 3.2941036551619067 + (8.67712634e-05 * 100000)
print(predicted_exchange_rate)


11.971229995161906


## What is the MSE of the model on the test set?

In [None]:
y_pred = model.predict(X_test[['Value of Exports (ZAR)']])


In [None]:
mse = mean_squared_error(y_test, y_pred)
print("MSE:", mse)


MSE: 8.221852113297055


## What is the R-squared value of the model on the test set?

In [None]:
r2 = r2_score(y_test, y_pred)
print("R-squared:", r2)


R-squared: -8.448868004702902


## What is the predicted value for August 2017?

In [None]:
# Get the value of exports for August 2017
august_2017_data = pd.DataFrame({'Value of Exports (ZAR)': [exports_aug_2017]})
predicted_exchange_rate = model.predict(august_2017_data)[0]

print("Predicted exchange rate for August 2017:", predicted_exchange_rate)


Predicted exchange rate for August 2017: 12.248826017678892


## What is the absolute error for this prediction?

the actual exchange rate for August 2017 is 13.23.

The predicted exchange rate for August 2017 is 12.25.

Therefore, the absolute error is:

|Predicted Value - Actual Value| = |12.25 - 13.23| = 0.98

## Which variable has the weakest linear relationship with the ZAR/USD exchange rate?

In [None]:
# Calculate correlations
correlations = data.corr()

# Print the correlations
print(correlations)

                                          ZAR/USD  Value of Exports (USD)  \
ZAR/USD                                  1.000000               -0.335780   
Value of Exports (USD)                  -0.335780                1.000000   
Value of Exports (ZAR)                   0.784756                0.298409   
Value of Imports (USD)                  -0.350606                0.735534   
Value of Imports (ZAR)                   0.791994                0.112544   
IMF Reserve Position (USD)               0.855311               -0.133469   
Foreign Exchange (USD)                   0.317203                0.587733   
Claims on Non-residents (USD)           -0.727278                0.137726   
Liabilities to Non-residents (USD)      -0.472230               -0.313378   
Savings Rate                             0.041555               -0.507888   
Lending Rate                            -0.056806               -0.499981   
Government Bonds                         0.191455               -0.241177   

From the correlation matrix provided, here are the correlations of the ZAR/USD exchange rate with the variables in question:

Savings Rate: 0.041555
Lending Rate: -0.056806
Claims on Non-residents: -0.727278
Consumer Price Index: 0.872660
The absolute values of these correlations are:

Savings Rate: 0.041555
Lending Rate: 0.056806
Claims on Non-residents: 0.727278
Consumer Price Index: 0.872660

### Savings Rate has the weakest linear relationship with the ZAR/USD exchange rate.

In [None]:
# Load the dataset
df = pd.read_csv('rand-dollar.csv', index_col=0)

# Separate features (X) and target variable (y)
X = df.drop('ZAR/USD', axis=1)  # All columns except 'ZAR/USD'
y = df['ZAR/USD']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize models
ridge_model = Ridge()
lasso_model = Lasso(alpha=0.01)

# Train the Ridge regression model
ridge_model.fit(X_train, y_train)

# Train the Lasso regression model
lasso_model.fit(X_train, y_train)

## What is the training MSE of the Ridge model?

In [None]:
# Make predictions and calculate Mean Squared Error (MSE) for Ridge model
y_train_pred_ridge = ridge_model.predict(X_train)
training_mse_ridge = mean_squared_error(y_train, y_train_pred_ridge)
print("Training MSE for Ridge model:", training_mse_ridge)

Training MSE for Ridge model: 0.03998637919597087


## What is the training MSE of the LASSO model?



In [None]:
# Make predictions and calculate Mean Squared Error (MSE) for Lasso model
y_train_pred_lasso = lasso_model.predict(X_train)
training_mse_lasso = mean_squared_error(y_train, y_train_pred_lasso)
print("Training MSE for Lasso model:", training_mse_lasso)


Training MSE for Lasso model: 0.0469446388604385


## What is the testing MSE of the Ridge model?



In [None]:
y_test_pred_ridge = ridge_model.predict(X_test)
testing_mse_ridge = mean_squared_error(y_test, y_test_pred_ridge)
print("Testing MSE for Ridge model:", testing_mse_ridge)

Testing MSE for Ridge model: 0.6273531237788174


In [None]:
y_test_pred_lasso = lasso_model.predict(X_test)
testing_mse_lasso = mean_squared_error(y_test, y_test_pred_lasso)
print("Testing MSE for Lasso model:", testing_mse_lasso)

Testing MSE for Lasso model: 0.6212875448834096


Model: Ridge
Training MSE: 0.03998637919597087
Testing MSE: 0.6273531237788174
------------------------------
Model: Lasso
Training MSE: 0.0469446388604385
Testing MSE: 0.6212875448834096
------------------------------


Based on the values of the Ridge model’s variable coefficients, which indicator is the best predictor of the target variable?

Value of Imports (ZAR)
Value of Exports (ZAR)
Liabilities to Non-residents (USD)

*   Government Bonds
*   Value of Imports (ZAR)
*   Value of Exports (ZAR)
*   Liabilities to Non-residents (USD)
  



In [None]:
# Extract coefficients and feature names
coefficients = ridge_model.coef_
feature_names = X.columns

# Create a DataFrame to view coefficients
coefficients_df = pd.DataFrame({'Feature': feature_names, 'Coefficient': coefficients})

# Print coefficients
print(coefficients_df)

# Identify the feature with the highest absolute coefficient value
best_predictor = coefficients_df.loc[coefficients_df['Coefficient'].abs().idxmax()]

print(f"The best predictor is '{best_predictor['Feature']}' with a coefficient value of {best_predictor['Coefficient']}.")

                                    Feature  Coefficient
0                    Value of Exports (USD)    -0.639615
1                    Value of Exports (ZAR)     0.943210
2                    Value of Imports (USD)    -0.697817
3                    Value of Imports (ZAR)     0.885124
4                IMF Reserve Position (USD)    -0.038984
5                    Foreign Exchange (USD)     0.136498
6             Claims on Non-residents (USD)     0.054841
7        Liabilities to Non-residents (USD)     0.003169
8                              Savings Rate    -0.066482
9                              Lending Rate     0.558796
10                         Government Bonds     0.017588
11  Financial Market Prices, Equities Index    -0.116183
12                     Consumer Price Index     0.550911
The best predictor is 'Value of Exports (ZAR)' with a coefficient value of 0.9432099826936856.


## Based on the values of the Ridge model’s variable coefficients, which indicator is the worst predictor of the target variable?




In [None]:
# Identify the feature with the smallest absolute coefficient value
worst_predictor = coefficients_df.loc[coefficients_df['Coefficient'].abs().idxmin()]

print(f"The worst predictor is '{worst_predictor['Feature']}' with a coefficient value of {worst_predictor['Coefficient']}.")

The worst predictor is 'Liabilities to Non-residents (USD)' with a coefficient value of 0.003169248838156524.


## Based on the values of the LASSO model’s variable coefficients, which indicator is the best predictor of the target variable?



In [None]:
# Extract coefficients and feature names
coefficients = lasso_model.coef_
feature_names = X.columns

# Create a DataFrame to view coefficients
coefficients_df = pd.DataFrame({'Feature': feature_names, 'Coefficient': coefficients})

# Print coefficients
print(coefficients_df)

# Identify the feature with the highest absolute coefficient value
best_predictor = coefficients_df.loc[coefficients_df['Coefficient'].abs().idxmax()]

print(f"The best predictor according to the LASSO model is '{best_predictor['Feature']}' with a coefficient value of {best_predictor['Coefficient']}.")

                                    Feature  Coefficient
0                    Value of Exports (USD)    -0.663943
1                    Value of Exports (ZAR)     1.028572
2                    Value of Imports (USD)    -0.703570
3                    Value of Imports (ZAR)     0.931447
4                IMF Reserve Position (USD)    -0.000000
5                    Foreign Exchange (USD)     0.029681
6             Claims on Non-residents (USD)     0.012059
7        Liabilities to Non-residents (USD)     0.022318
8                              Savings Rate     0.000000
9                              Lending Rate     0.273183
10                         Government Bonds    -0.000000
11  Financial Market Prices, Equities Index    -0.046981
12                     Consumer Price Index     0.324598
The best predictor according to the LASSO model is 'Value of Exports (ZAR)' with a coefficient value of 1.0285722610690782.


## How many variables have coefficients equal to zero in the LASSO model?

In [None]:
# Count the number of variables with coefficients equal to zero
num_zero_coefficients = (coefficients == 0).sum()

print(f"Number of variables with coefficients equal to zero in the LASSO model: {num_zero_coefficients}")

Number of variables with coefficients equal to zero in the LASSO model: 3
