In [11]:
# Run the stored data from the Data_Preparation.ipynb
%run ..//Data_Preparation.ipynb

Data types in 'indu' dataframe:
Time       datetime64[ns]
indu_Q1           float64
indu_Q2           float64
indu_Q3           float64
indu_Q4           float64
indu_Q5           float64
indu_Q7           float64
dtype: object

Data types in 'cons' dataframe:
Time        datetime64[ns]
cons_Q1            float64
cons_Q2            float64
cons_Q3            float64
cons_Q4            float64
cons_Q5            float64
cons_Q6            float64
cons_Q7            float64
cons_Q8            float64
cons_Q9            float64
cons_Q10           float64
cons_Q11           float64
cons_Q12           float64
dtype: object

Data types in 'gdp' dataframe:
Time     datetime64[ns]
Value           float64
dtype: object
indu_Q1           float64
indu_Q2           float64
indu_Q3           float64
indu_Q4           float64
indu_Q5           float64
indu_Q7           float64
Time       datetime64[ns]
dtype: object
cons_Q1            float64
cons_Q2            float64
cons_Q3            float64
con

In [12]:
#Split the data into training and testing data
indu_train = indu[(indu['Time'] >= '1990-01-01') & (indu['Time'] < '2020-01-01')]
cons_train = cons[(cons['Time'] >= '1990-01-01') & (cons['Time'] < '2020-01-01')]
gdp_train = gdp[(gdp['Time'] >= '1990-01-01') & (gdp['Time'] < '2020-01-01')]

indu_test = indu[(indu['Time'] >= '2020-01-01') & (indu['Time'] < '2024-01-01')]
cons_test = cons[(cons['Time'] >= '2020-01-01') & (cons['Time'] < '2024-01-01')]
gdp_test = gdp[(gdp['Time'] >= '2020-01-01') & (gdp['Time'] < '2024-01-01')]

In [13]:
#Importing the packages enabling the Large Language Model
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline


In [14]:
model_name = "mistralai/Mistral-7B-v0.1"

#Defining what device to use
device = torch.device("cpu")

model = AutoModelForCausalLM.from_pretrained(model_name, torchscript=True, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)
generation_config = GenerationConfig.from_pretrained(model_name)

Loading checkpoint shards: 100%|██████████| 2/2 [00:47<00:00, 23.57s/it]


In [15]:
#Setting specific configs
generation_config.temperature = 0.0001 #Making the model deterministic

In [16]:
# Format the training data as a string
gdp_train_str = gdp_train.to_string(index=False)

# Inserting gdp_train into the prompt string
prompt = f"""
You are a time series forecasting model designed to predict the Danish GDP based on historical data. 
Your task is to forecast the GDP from 2020Q1 until 2023Q4, using the provided GDP training data.

The provided GDP training data is as follows:
{gdp_train_str}

Please analyze the provided data, which represents the GDP values over time, and generate predictions for the GDP for the specified timeframe. Your output should be a list of predicted GDP values for each quarter from 2020Q1 to 2023Q4.

Note: Your predictions should consider the underlying patterns in the data and provide realistic forecasts.
"""

print(prompt)


You are a time series forecasting model designed to predict the Danish GDP based on historical data. 
Your task is to forecast the GDP from 2020Q1 until 2023Q4, using the provided GDP training data.

The provided GDP training data is as follows:
      Time  Value
1990-01-01  210.2
1990-04-01  218.1
1990-07-01  209.5
1990-10-01  217.8
1991-01-01  220.2
1991-04-01  226.1
1991-07-01  219.6
1991-10-01  224.6
1992-01-01  228.1
1992-04-01  231.1
1992-07-01  227.1
1992-10-01  236.7
1993-01-01  227.8
1993-04-01  232.7
1993-07-01  227.8
1993-10-01  240.2
1994-01-01  238.3
1994-04-01  252.4
1994-07-01  242.8
1994-10-01  259.8
1995-01-01  254.3
1995-04-01  261.5
1995-07-01  252.8
1995-10-01  267.8
1996-01-01  260.8
1996-04-01  275.2
1996-07-01  269.6
1996-10-01  282.5
1997-01-01  273.9
1997-04-01  291.8
1997-07-01  281.8
1997-10-01  298.6
1998-01-01  288.4
1998-04-01  294.3
1998-07-01  293.8
1998-10-01  309.5
1999-01-01  296.3
1999-04-01  310.0
1999-07-01  306.6
1999-10-01  328.6
2000-01-01  316

In [17]:
pre_prompt = f"""
You are a time series forecasting model designed to predict the Danish GDP based on historical data. 
Your task is to forecast the GDP for specified quarters, using the provided GDP training data.

The provided GDP training data includes historical GDP values from 1990 to 2019:
{gdp_train_str}

Please analyze the provided data, which represents the GDP values over time, and generate predictions for the specified quarters. Your output should be a list of predicted GDP values for each specified quarter.

Note: Your predictions should consider the underlying patterns in the data and provide realistic forecasts.
"""

print(pre_prompt)


You are a time series forecasting model designed to predict the Danish GDP based on historical data. 
Your task is to forecast the GDP for specified quarters, using the provided GDP training data.

The provided GDP training data includes historical GDP values from 1990 to 2019:
      Time  Value
1990-01-01  210.2
1990-04-01  218.1
1990-07-01  209.5
1990-10-01  217.8
1991-01-01  220.2
1991-04-01  226.1
1991-07-01  219.6
1991-10-01  224.6
1992-01-01  228.1
1992-04-01  231.1
1992-07-01  227.1
1992-10-01  236.7
1993-01-01  227.8
1993-04-01  232.7
1993-07-01  227.8
1993-10-01  240.2
1994-01-01  238.3
1994-04-01  252.4
1994-07-01  242.8
1994-10-01  259.8
1995-01-01  254.3
1995-04-01  261.5
1995-07-01  252.8
1995-10-01  267.8
1996-01-01  260.8
1996-04-01  275.2
1996-07-01  269.6
1996-10-01  282.5
1997-01-01  273.9
1997-04-01  291.8
1997-07-01  281.8
1997-10-01  298.6
1998-01-01  288.4
1998-04-01  294.3
1998-07-01  293.8
1998-10-01  309.5
1999-01-01  296.3
1999-04-01  310.0
1999-07-01  306.6


In [18]:
#Creating a invididual prompt for each quarter
period_prompt = """
You are tasked with predicting the Danish GDP for the period of {}.

Please analyze the historical GDP data and generate a prediction for the GDP value for this period.

Note: Provide a single predicted GDP value for the specified period.
"""

print(period_prompt)


You are tasked with predicting the Danish GDP for the period of {}.

Please analyze the historical GDP data and generate a prediction for the GDP value for this period.

Note: Provide a single predicted GDP value for the specified period.



In [19]:
#Creating the pipeline for the model with the given model, tokenizer and generation config
predictions = pipeline("text-generation", 
                       model=model, 
                       tokenizer=tokenizer, 
                       config=generation_config, 
                       device=device
                       )

In [20]:
# Concatenate the pre-prompt with each period prompt
def generate_output(pre_prompt, period_prompt, periods):
    output = []
    for period in periods:
        full_prompt = pre_prompt + period_prompt.format(period)
        # Generate predictions for the concatenated prompt 
        period_output = predictions(full_prompt, max_length=100, num_return_sequences=1)
        output.append(period_output)
        print(f"Prediction created for {period}: {period_output} ")
    return output

# Define the periods you want to predict from gdp_test['Time']
periods_to_predict = gdp_test['Time'].dt.to_period('Q').astype(str)

# Generate output for each period
output = generate_output(pre_prompt, period_prompt, periods_to_predict)

# Print the output for each period
for period, period_output in zip(periods_to_predict, output):
    print(f"Predictions for {period}:")
    print(period_output)


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Predictions for 2020Q1:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Predictions for 2020Q2:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Predictions for 2020Q3:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Predictions for 2020Q4:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Predictions for 2021Q1:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Predictions for 2021Q2:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Predictions for 2021Q3:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Predictions for 2021Q4:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Predictions for 2022Q1:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Predictions for 2022Q2:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Predictions for 2022Q3:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Predictions for 2022Q4:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Predictions for 2023Q1:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Predictions for 2023Q2:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Predictions for 2023Q3:
Predictions for 2023Q4:
Predictions for 2020Q1:
[{'generated_text': '\nYou are a time series forecasting model designed to predict the Danish GDP based on historical data. \nYour task is to forecast the GDP for specified quarters, using the provided GDP training data.\n\nThe provided GDP training data includes historical GDP values from 1990 to 2019:\n      Time  Value\n1990-01-01  210.2\n1990-04-01  218.1\n1990-07-01  209.5\n1990-10-01  217.8\n1991-01-01  220.2\n1991-04-01  226.1\n1991-07-01  219.6\n1991-10-01  224.6\n1992-01-01  228.1\n1992-04-01  231.1\n1992-07-01  227.1\n1992-10-01  236.7\n1993-01-01  227.8\n1993-04-01  232.7\n1993-07-01  227.8\n1993-10-01  240.2\n1994-01-01  238.3\n1994-04-01  252.4\n1994-07-01  242.8\n1994-10-01  259.8\n1995-01-01  254.3\n1995-04-01  261.5\n1995-07-01  252.8\n1995-10-01  267.8\n1996-01-01  260.8\n1996-04-01  275.2\n1996-07-01  269.6\n1996-10-01  282.5\n1997-01-01  273.9\n1997-04-01  291.8\n1997-07-01  281.8\n1997-10-01  29