In [6]:
# Run the stored data from the Data_Preparation.ipynb
%run ..//Data_Preparation.ipynb

Data types in 'indu' dataframe:
Time       datetime64[ns]
indu_Q1           float64
indu_Q2           float64
indu_Q3           float64
indu_Q4           float64
indu_Q5           float64
indu_Q7           float64
dtype: object

Data types in 'cons' dataframe:
Time        datetime64[ns]
cons_Q1            float64
cons_Q2            float64
cons_Q3            float64
cons_Q4            float64
cons_Q5            float64
cons_Q6            float64
cons_Q7            float64
cons_Q8            float64
cons_Q9            float64
cons_Q10           float64
cons_Q11           float64
cons_Q12           float64
dtype: object

Data types in 'gdp' dataframe:
Time     datetime64[ns]
Value           float64
dtype: object
indu_Q1           float64
indu_Q2           float64
indu_Q3           float64
indu_Q4           float64
indu_Q5           float64
indu_Q7           float64
Time       datetime64[ns]
dtype: object
cons_Q1            float64
cons_Q2            float64
cons_Q3            float64
con

In [7]:
#Split the data into training and testing data
indu_train = indu[(indu['Time'] >= '1990-01-01') & (indu['Time'] < '2020-01-01')]
cons_train = cons[(cons['Time'] >= '1990-01-01') & (cons['Time'] < '2020-01-01')]
gdp_train = gdp[(gdp['Time'] >= '1990-01-01') & (gdp['Time'] < '2020-01-01')]

indu_test = indu[(indu['Time'] >= '2020-01-01') & (indu['Time'] < '2024-01-01')]
cons_test = cons[(cons['Time'] >= '2020-01-01') & (cons['Time'] < '2024-01-01')]
gdp_test = gdp[(gdp['Time'] >= '2020-01-01') & (gdp['Time'] < '2024-01-01')]

In [8]:
#Importing the packages enabling the Large Language Model
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline


In [9]:
model_name = "mistralai/Mistral-7B-v0.1"

#Defining what device to use
device = torch.device("cpu")

model = AutoModelForCausalLM.from_pretrained(model_name, torchscript=True, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)
generation_config = GenerationConfig.from_pretrained(model_name)

Loading checkpoint shards: 100%|██████████| 2/2 [00:30<00:00, 15.45s/it]


In [10]:
#Setting specific configs
generation_config.temperature = 0.0001 #Making the model deterministic

In [16]:
pre_prompt = "You are a time series forecasting model designed to predict the Danish GDP based on historical data. You will be asked what the GDP value will be for a certain date, corresponding to the related quarter."
for i, row in gdp_train.iterrows():
    pre_prompt += f"{i+1}. What was the GDP in {row['Time']}?\n Answer: {row['Value']} billion danish kroner\n\n"



print(pre_prompt)

You are a time series forecasting model designed to predict the Danish GDP based on historical data. You will be asked what the GDP value will be for a certain date, corresponding to the related quarter.1. What was the GDP in 1990-01-01 00:00:00?
 Answer: 210.2 billion danish kroner

2. What was the GDP in 1990-04-01 00:00:00?
 Answer: 218.1 billion danish kroner

3. What was the GDP in 1990-07-01 00:00:00?
 Answer: 209.5 billion danish kroner

4. What was the GDP in 1990-10-01 00:00:00?
 Answer: 217.8 billion danish kroner

5. What was the GDP in 1991-01-01 00:00:00?
 Answer: 220.2 billion danish kroner

6. What was the GDP in 1991-04-01 00:00:00?
 Answer: 226.1 billion danish kroner

7. What was the GDP in 1991-07-01 00:00:00?
 Answer: 219.6 billion danish kroner

8. What was the GDP in 1991-10-01 00:00:00?
 Answer: 224.6 billion danish kroner

9. What was the GDP in 1992-01-01 00:00:00?
 Answer: 228.1 billion danish kroner

10. What was the GDP in 1992-04-01 00:00:00?
 Answer: 231.1

In [17]:
#Creating a invididual prompt for each quarter
period_prompt = """
What is the GDP in {}?.
Answer: 
"""

print(period_prompt)


What is the GDP in {}?.
Answer: 



In [18]:
#Creating the pipeline for the model with the given model, tokenizer and generation config
predictions = pipeline("text-generation", 
                       model=model, 
                       tokenizer=tokenizer, 
                       config=generation_config, 
                       device=device
                       )

In [19]:
# Concatenate the pre-prompt with each period prompt
def generate_output(pre_prompt, period_prompt, periods):
    output = []
    for period in periods:
        full_prompt = pre_prompt + period_prompt.format(period)
        # Generate predictions for the concatenated prompt 
        period_output = predictions(full_prompt, max_length=100, num_return_sequences=1)
        output.append(period_output)
        print(f"Prediction created for {period}: {period_output} ")
    return output

# Define the periods you want to predict from gdp_test['Time']
periods_to_predict = gdp_test['Time'].dt.to_period('Q').astype(str)

# Generate output for each period
output = generate_output(pre_prompt, period_prompt, periods_to_predict)

# Print the output for each period
for period, period_output in zip(periods_to_predict, output):
    print(f"Predictions for {period}:")
    print(period_output)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Prediction created for 2020Q1: [{'generated_text': 'You are a time series forecasting model designed to predict the Danish GDP based on historical data. You will be asked what the GDP value will be for a certain date, corresponding to the related quarter.1. What was the GDP in 1990-01-01 00:00:00?\n Answer: 210.2 billion danish kroner\n\n2. What was the GDP in 1990-04-01 00:00:00?\n Answer: 218.1 billion danish kroner\n\n3. What was the GDP in 1990-07-01 00:00:00?\n Answer: 209.5 billion danish kroner\n\n4. What was the GDP in 1990-10-01 00:00:00?\n Answer: 217.8 billion danish kroner\n\n5. What was the GDP in 1991-01-01 00:00:00?\n Answer: 220.2 billion danish kroner\n\n6. What was the GDP in 1991-04-01 00:00:00?\n Answer: 226.1 billion danish kroner\n\n7. What was the GDP in 1991-07-01 00:00:00?\n Answer: 219.6 billion danish kroner\n\n8. What was the GDP in 1991-10-01 00:00:00?\n Answer: 224.6 billion danish kroner\n\n9. What was the GDP in 1992-01-01 00:00:00?\n Answer: 228.1 billi

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Prediction created for 2020Q2: [{'generated_text': 'You are a time series forecasting model designed to predict the Danish GDP based on historical data. You will be asked what the GDP value will be for a certain date, corresponding to the related quarter.1. What was the GDP in 1990-01-01 00:00:00?\n Answer: 210.2 billion danish kroner\n\n2. What was the GDP in 1990-04-01 00:00:00?\n Answer: 218.1 billion danish kroner\n\n3. What was the GDP in 1990-07-01 00:00:00?\n Answer: 209.5 billion danish kroner\n\n4. What was the GDP in 1990-10-01 00:00:00?\n Answer: 217.8 billion danish kroner\n\n5. What was the GDP in 1991-01-01 00:00:00?\n Answer: 220.2 billion danish kroner\n\n6. What was the GDP in 1991-04-01 00:00:00?\n Answer: 226.1 billion danish kroner\n\n7. What was the GDP in 1991-07-01 00:00:00?\n Answer: 219.6 billion danish kroner\n\n8. What was the GDP in 1991-10-01 00:00:00?\n Answer: 224.6 billion danish kroner\n\n9. What was the GDP in 1992-01-01 00:00:00?\n Answer: 228.1 billi