In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Load the dataset
file_path = 'rand-dollar.csv'
df = pd.read_csv(file_path, index_col=0)

# Separate the data into features (X) and target (y)
X = df[['Value of Exports (ZAR)']]
y = df['ZAR/USD']

# Create an 80/20 split between train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Train a simple linear regression model
lm = LinearRegression()
lm.fit(X_train, y_train)

# Output the coefficients and intercept of the model
lm.coef_, lm.intercept_

(array([8.67712634e-05]), 3.2941036551619103)

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Load the dataset
file_path = 'rand-dollar.csv'
df = pd.read_csv(file_path, index_col=0)

# Separate the data into features (X) and target (y)
X = df[['Value of Exports (ZAR)']]
y = df['ZAR/USD']

# Create an 80/20 split between train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Train a simple linear regression model
lm = LinearRegression()
lm.fit(X_train, y_train)

# Output the coefficients and intercept of the model
slope = lm.coef_[0]
intercept = lm.intercept_

slope, intercept

(8.677126339792249e-05, 3.2941036551619103)

In [3]:
# Recalculate the predicted value of the exchange rate given the intercept and slope
intercept = 3.2941036551619103
slope = 8.67712634e-05
value_of_exports = 100000

# Calculate the predicted value
predicted_ZAR_USD = intercept + (slope * value_of_exports)
predicted_ZAR_USD

11.97122999516191

In [4]:
from sklearn.metrics import mean_squared_error

# Predict the exchange rates on the test set
y_pred = lm.predict(X_test)

# Calculate the Mean Squared Error
mse = mean_squared_error(y_test, y_pred)
mse

8.221852113297063

In [5]:
# Calculate the R-squared value on the test set
r_squared = lm.score(X_test, y_test)
r_squared

-8.448868004702911

In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Load the dataset
file_path = 'rand-dollar.csv'
df = pd.read_csv(file_path, index_col=0)

# Convert the index to datetime using the custom format
df.index = pd.to_datetime(df.index, format='%YM%m')

# Separate the data into features (X) and target (y)
X = df[['Value of Exports (ZAR)']]
y = df['ZAR/USD']

# Create an 80/20 split between train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Train a simple linear regression model
lm = LinearRegression()
lm.fit(X_train, y_train)

# Extract the Value of Exports for August 2017
august_2017_exports = df.loc['2017-08']['Value of Exports (ZAR)']

# Ensure the value is in the correct shape for prediction
august_2017_exports = np.array(august_2017_exports).reshape(1, -1)

# Predict the exchange rate for August 2017
august_2017_prediction = lm.predict(august_2017_exports)
august_2017_prediction[0]



12.24882601767889

In [10]:
# Extract the actual ZAR/USD exchange rate for August 2017
actual_august_2017 = df.loc['2017-08']['ZAR/USD']

# Calculate the absolute error
absolute_error = abs(actual_august_2017 - august_2017_prediction[0])
absolute_error

2017-08-01    0.981174
Name: ZAR/USD, dtype: float64

In [11]:
import pandas as pd

# Load the dataset
file_path = 'rand-dollar.csv'
df = pd.read_csv(file_path, index_col=0)

# Calculate the correlation matrix
correlation_matrix = df.corr()

# Extract the correlations with the target variable (ZAR/USD)
correlations_with_target = correlation_matrix['ZAR/USD']

# Find the variable with the weakest linear relationship with ZAR/USD
weakest_relationship = correlations_with_target.drop('ZAR/USD').idxmin()
weakest_relationship, correlations_with_target[weakest_relationship]

('Claims on Non-residents (USD)', -0.7272781127462906)

In [14]:
import pandas as pd

# Load the dataset
file_path = 'rand-dollar.csv'
df = pd.read_csv(file_path, index_col=0)

# Calculate the correlation matrix
correlation_matrix = df.corr()

# Extract the correlations with the target variable (ZAR/USD)
correlations_with_target = correlation_matrix['ZAR/USD']

# Find the variable with the strongest linear relationship with ZAR/USD
strongest_relationship = correlations_with_target.drop('ZAR/USD').idxmax()
strongest_relationship, correlations_with_target[strongest_relationship]

('Consumer Price Index', 0.8726596961841487)

In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, Lasso
from sklearn.preprocessing import StandardScaler

# Load the dataset
file_path = 'rand-dollar.csv'
df = pd.read_csv(file_path, index_col=0)

# Separate the data into features (X) and target (y)
X = df.drop(columns=['ZAR/USD'])
y = df['ZAR/USD']

# Standardise the entire X matrix
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Create an 80/20 split between train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, shuffle=False)

# Train a Ridge regression model
ridge_model = Ridge()
ridge_model.fit(X_train, y_train)

# Train a LASSO regression model with alpha=0.01
lasso_model = Lasso(alpha=0.01)
lasso_model.fit(X_train, y_train)

# Output the coefficients of both models
ridge_coefficients = ridge_model.coef_
lasso_coefficients = lasso_model.coef_

ridge_coefficients, lasso_coefficients

(array([-0.57865633,  1.06118319, -0.73636708,  1.00252832,  0.02048673,
         0.15562532,  0.06812484,  0.00571159, -0.08751804,  0.52560629,
         0.01877206, -0.11293656,  0.63080368]),
 array([-0.57764657,  1.13544991, -0.81671784,  1.1718194 ,  0.        ,
         0.08023244,  0.        ,  0.03039856,  0.        ,  0.20985475,
        -0.        , -0.03094424,  0.23931429]))

In [16]:
from sklearn.metrics import mean_squared_error

# Predict the target values for the training set using the Ridge model
y_train_pred_ridge = ridge_model.predict(X_train)

# Calculate the MSE for the training set
mse_train_ridge = mean_squared_error(y_train, y_train_pred_ridge)
mse_train_ridge

0.0402329882761449

In [21]:
from sklearn.metrics import mean_squared_error

# Predict the target values for the training set using the Lasso model
y_train_pred_lasso = lasso_model.predict(X_train)

# Calculate the MSE for the training set
mse_train_lasso = mean_squared_error(y_train, y_train_pred_lasso)
mse_train_lasso

0.04695493921820656

In [22]:
from sklearn.metrics import mean_squared_error

# Predict the target values for the training set using the Ridge model
y_test_pred_ridge = ridge_model.predict(X_test)

# Calculate the MSE for the training set
mse_test_ridge = mean_squared_error(y_test, y_test_pred_ridge)
mse_test_ridge

0.6323866944567456

In [24]:
from sklearn.metrics import mean_squared_error

# Predict the target values for the training set using the Lasso model
y_test_pred_lasso = lasso_model.predict(X_test)

# Calculate the MSE for the training set
mse_test_lasso = mean_squared_error(y_test, y_test_pred_lasso)
mse_test_lasso

0.5790226574339913

In [26]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

# Load the dataset
file_path = 'rand-dollar.csv'
df = pd.read_csv(file_path, index_col=0)

# Separate the data into features (X) and target (y)
X = df.drop(columns=['ZAR/USD'])
y = df['ZAR/USD']

# Standardise the entire X matrix
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Create an 80/20 split between train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, shuffle=False)

# Train a Ridge regression model
ridge_model = Ridge()
ridge_model.fit(X_train, y_train)

# Retrieve the coefficients of the trained Ridge model
ridge_coefficients = ridge_model.coef_

# Create a DataFrame for the coefficients and their corresponding feature names
coefficients_df = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': ridge_coefficients
})

# Identify the predictor variable with the highest absolute coefficient value
best_predictor = coefficients_df.loc[coefficients_df['Coefficient'].abs().idxmax()]

best_predictor

Feature        Value of Exports (ZAR)
Coefficient                  1.061183
Name: 1, dtype: object

In [27]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

# Load the dataset
file_path = 'rand-dollar.csv'
df = pd.read_csv(file_path, index_col=0)

# Separate the data into features (X) and target (y)
X = df.drop(columns=['ZAR/USD'])
y = df['ZAR/USD']

# Standardise the entire X matrix
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Create an 80/20 split between train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, shuffle=False)

# Train a Ridge regression model
ridge_model = Ridge()
ridge_model.fit(X_train, y_train)

# Retrieve the coefficients of the trained Ridge model
ridge_coefficients = ridge_model.coef_

# Create a DataFrame for the coefficients and their corresponding feature names
coefficients_df = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': ridge_coefficients
})

# Identify the predictor variable with the highest absolute coefficient value
worst_predictor = coefficients_df.loc[coefficients_df['Coefficient'].abs().idxmin()]

worst_predictor

Feature        Liabilities to Non-residents (USD)
Coefficient                              0.005712
Name: 7, dtype: object

In [28]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

# Load the dataset
file_path = 'rand-dollar.csv'
df = pd.read_csv(file_path, index_col=0)

# Separate the data into features (X) and target (y)
X = df.drop(columns=['ZAR/USD'])
y = df['ZAR/USD']

# Standardise the entire X matrix
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Create an 80/20 split between train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, shuffle=False)

# Train a Ridge regression model
lasso_model = Lasso()
lasso_model.fit(X_train, y_train)

# Retrieve the coefficients of the trained Lasso model
lasso_coefficients = lasso_model.coef_

# Create a DataFrame for the coefficients and their corresponding feature names
coefficients_df = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': lasso_coefficients
})

# Identify the predictor variable with the highest absolute coefficient value
best_predictor = coefficients_df.loc[coefficients_df['Coefficient'].abs().idxmax()]

best_predictor

Feature        Value of Imports (ZAR)
Coefficient                  0.430798
Name: 3, dtype: object

In [31]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler

# Load the dataset
file_path = 'rand-dollar.csv'
df = pd.read_csv(file_path, index_col=0)

# Separate the data into features (X) and target (y)
X = df.drop(columns=['ZAR/USD'])
y = df['ZAR/USD']

# Standardise the entire X matrix
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Create an 80/20 split between train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, shuffle=False)

# Train a LASSO regression model with alpha=0.01
lasso_model = Lasso(alpha=0.01)
lasso_model.fit(X_train, y_train)

# Retrieve the coefficients of the trained LASSO model
lasso_coefficients = lasso_model.coef_

# Count the number of coefficients that are exactly zero
num_zero_coefficients = (lasso_coefficients == 0).sum()

num_zero_coefficients

4