# 3. WLS - local linear trend model

## Libraries

In [None]:
# Libraries

import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt

## Read data

In [None]:
# Read training data
df = pd.read_csv('DST_BIL54.csv')
#print(df.info())

# Convert 'time' column to datetime
df['time'] = pd.to_datetime(df['time'] + '-01', format='%Y-%m-%d', utc=True)
#print(df['time'])
#print(df['time'].dtype)

# Year to month for each of them
df['year'] = df['time'].dt.year + df['time'].dt.month / 12

# Make the output variable a floating point (i.e., decimal number)
df['total'] = df['total'].astype(float) / 1E6

# Round the 'total' column to 3 decimal points
df['total'] = df['total'].round(3)
df['year'] = df['year'].round(3)

# Divide into train and test set
test_start = pd.Timestamp('2024-01-01', tz='UTC')
df_train = df[df['time'] < test_start]
df_test = df[df['time'] >= test_start]

print(df.info())


In [None]:
# Dimensions check and head
print('Dimensions check')
print(np.shape(df))
print(np.shape(df_train))
print(np.shape(df_test))

df.head()

In [None]:
# Keep data of interest

X=df_train['year']
y=df_train['total']

print(X.head())
print(y.head())

In [None]:
# plot data 

plt.figure(figsize=(10, 6))
plt.plot(X, y, label='Total')
plt.xlabel('Time')
plt.ylabel('Total')
plt.title('Total over Time')
plt.legend()
plt.grid(True)
plt.show()

## Solve sub questions

In [None]:
# 3.1 Describe the variance-covariance matrix

lambda_ = 0.9

# Create the diagonal matrix
powers = np.arange(len(X)-1, -1, -1)
diagonal_values = lambda_ ** powers
Sigma = np.diag(diagonal_values)

# Display the lower right part of the matrix
lower_right_part = Sigma[-4:, -4:]  # Adjust the slice as needed
print("Lower right part of the matrix:")
print(lower_right_part)

print(' ')

# OLS SIgma 
Sigma_ols = np.diag(np.var(y)*np.ones(len(y)))
lower_right_part = Sigma_ols[-4:, -4:]  # Adjust the slice as needed
print("Lower right part of the OLS matrix:")
print(lower_right_part)

In [None]:
# 3.2 plot lambdas vs time

plt.figure(figsize=(10, 6))
plt.scatter(X, diagonal_values, label='lambdas')
plt.xlabel('Time')
plt.ylabel('Lambdas')
plt.title('Lambdas over Time')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# 3.3 sum of all the λ-weights

# WLS
lw=np.sum(diagonal_values)
print(lw)

# OLS
lo=len(X)
print(lo)

In [None]:
# 3.4 Estimate and present ˆθ1 and ˆθ2 corresponding to the WLS model with λ = 0.9.

def f(j):
    return np.array([[1], [j]])

# current F
F=np.zeros((2,2))
for i in range(len(X)): 
    F+=diagonal_values[i]*((f(X[i]) @ f(X[i]).T))
#print(F)

# current h
h=np.zeros((2,1))
for i in range(len(X)): 
    h+=(diagonal_values[i]*f(X[i]))*y[i]
#print(h)

# current thetas

thetas=np.linalg.inv(F) @ h
print(thetas)

# linear predictor for thetas:
thet_pred=list([f(i).T@thetas for i in X])
thet_pred=[float(thet_pred[i][0]) for i in range(len(thet_pred))]
#print(thet_pred)

# plot

plt.figure(figsize=(10, 6))
plt.plot(X, y, label='Total')
plt.plot(X,thet_pred, color='red')
plt.xlabel('Time')
plt.ylabel('Total')
plt.title('Total over Time')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# 3.5 Make a forecast for the next 12 months 

X_test=df_test['year']
y_test=df_test['total']

# linear predictor for thetas:
test_pred=list([f(i).T@thetas for i in X_test])
test_pred=[float(test_pred[i][0]) for i in range(len(test_pred))]

# plot

plt.figure(figsize=(10, 6))
plt.plot(X, y, label='Train Total')
plt.plot(X,thet_pred, color='red',label='linear predictor')
plt.plot(X_test, y_test, color='grey',label='Test Total')
plt.plot(X_test,test_pred, color='red')
plt.xlabel('Time')
plt.ylabel('Total')
plt.title('Total over Time')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# 3.6 multiple lambdas

def predgen(l):
    powers = np.arange(len(X)-1, -1, -1)
    diagonal_values = l ** powers
    # current F
    F=np.zeros((2,2))
    for i in range(len(X)): 
        F+=diagonal_values[i]*((f(X[i]) @ f(X[i]).T))
    #print(F)

    # current h
    h=np.zeros((2,1))
    for i in range(len(X)): 
        h+=(diagonal_values[i]*f(X[i]))*y[i]
    #print(h)

    # current thetas

    thetas=np.linalg.inv(F) @ h
    print(thetas)

    # linear predictor for thetas:
    thet_pred=list([f(i).T@thetas for i in X])
    thet_pred=[float(thet_pred[i][0]) for i in range(len(thet_pred))]

    test_pred=list([f(i).T@thetas for i in X_test])
    test_pred=[float(test_pred[i][0]) for i in range(len(test_pred))]

    return thet_pred,test_pred

#ols
thet_pred1,test_pred1=predgen(1)
#0.9
thet_pred09,test_pred09=predgen(0.9)
#0.90
thet_pred099,test_pred099=predgen(0.99)
#0.8
thet_pred08,test_pred08=predgen(0.8)
#0.7
thet_pred07,test_pred07=predgen(0.7)



plt.figure(figsize=(10, 6))
plt.plot(X, y, label='Train Total')
plt.plot(X,thet_pred09, color='red',label='WLS predictor')
plt.plot(X,thet_pred1, color='black',label='OLS')
plt.plot(X_test, y_test, color='grey',label='Test Total')
plt.plot(X_test,test_pred09, color='red')
plt.plot(X_test,test_pred1, color='black')
#MULTIL
plt.plot(X_test,test_pred099, color='green',label="different lambdas")
plt.plot(X_test,test_pred08, color='green')
plt.plot(X_test,test_pred07, color='green')


plt.xlabel('Time')
plt.ylabel('Total')
plt.title('Total over Time')
plt.legend()
plt.grid(True)
plt.show()
    