# (Fine-Tuning and) Inference with Hugging Face Transformers

In [None]:
!pip install transformers datasets accelerate --quiet

import torch

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available and being used.")
else:
    device = torch.device("cpu")
    print("GPU not available, using CPU instead.")

## Step 1: Import Libraries and Load Dataset

In [None]:
import pandas as pd
from datasets import Dataset, load_from_disk
from sklearn.preprocessing import MinMaxScaler
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
import random

In [None]:
# Load dataset
csv_url = "./Rohdaten CSV.csv"                  # CSV file URL or path
data = pd.read_csv(csv_url)

index_columns = data.columns[1:6]               # Assuming the last column is the Target, 5 features, fewer features so it fits the context window, otherwise too computationally expensive

In [None]:
# Splitting examples and test data

# Convert to Hugging Face Dataset
hf_dataset = Dataset.from_pandas(data)                                  # [['input_text']])

# Split into train and validation sets
hf_dataset = hf_dataset.train_test_split(test_size=0.1)                 # telling us that model is running 10% of the data, only 10% with taking 5 features into account

# store the complete dataset
hf_dataset.save_to_disk("complete_hf_dataset")

In [None]:
# TRY RANDOM.SEED

def createPromptData(row, target=False):
    # Convert the 5 columns (because more is computationally expensive) to strings and join them with a space
    line_text = ' '.join(str(row[col]) for col in index_columns)

    # Append the target value if specified
    if target:
        line_text += f" Target: {int(row['Target'])}"

    return line_text

def generateExamples(data, window_size=5):  # 5 examples for the model
    # Get random indices for sampling
    random_indices = random.sample(range(len(data)), window_size) # VARIATION COMES FROM HERE, SET THE RANDOM SO THERES NO VARIATION, RANDOM.SEED, SET IT AT TOP OF ALL THESE FUNCTIONS

    # Retrieve the examples using Hugging Face Dataset indexing
    examples = [data[i]["line_text"] for i in random_indices]
    return examples

def generatePrompts(traindata, testdata):
    # Add input_text to testdata
    testdata = testdata.map(
        lambda row: {
            "input_text": "\n".join(generateExamples(traindata, 5) + [row["line_text"]])
        }
    )
    return testdata  # Ensure the modified dataset is returned

print(hf_dataset.column_names)
hf_dataset["train"] = hf_dataset["train"].map(lambda row: {"line_text": createPromptData(row, True)})
hf_dataset["test"] = hf_dataset["test"].map(lambda row: {"line_text": createPromptData(row, True)})

hf_dataset.save_to_disk("final_hf_dataset")

# Load complete dataset

In [None]:
# OPTIONAL

hf_dataset = load_from_disk("final_hf_dataset")
hf_dataset.push_to_hub("Petar-Uni-Freiburg/LLM_Time_Series", private=True)
print(hf_dataset["test"][0]["line_text"]) # test output

## Step 2: Load and run the Model

In [None]:
# Load pre-trained model and tokenizer
model_name = "openlm-research/open_llama_3b_v2"                                   # Replace with your model name (e.g., "facebook/llama")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token                                         # Use the EOS token as the padding token
tokenizer.pad_token_id = tokenizer.eos_token_id

model = model.to(device)                                                          # Move the model to the selected device

In [None]:
import gc
import torch
gc.collect()
torch.cuda.empty_cache()

# Inside your `tokenize_function`:
def tokenize_function(examples):
    tokenized_inputs = tokenizer(examples['input_text'], truncation=True, max_length=512, padding='max_length', return_tensors="pt")
    tokenized_inputs = {k: v.to(device) for k, v in tokenized_inputs.items()}  # Move tokenized inputs to device
    return tokenized_inputs

hf_dataset["test"] = generatePrompts(hf_dataset["train"], hf_dataset["test"])
tokenized_data = hf_dataset#.map(tokenize_function, batched=True)

def generatePrediction(row):
  # Generate the prompt by removing the actual Target, so the LLM has to predict
  input = tokenize_function({"input_text": row["input_text"][:-1]})                                 # the -1 eliminates the target column so the model has to make an inference/prediction as to the most likely next token
                                                                                                 #print(input)
                                                                                                 #print(tokenizer.decode(input["input_ids"][0], skip_special_tokens=True))

                                                                                                 #print(input)
  output = model.generate(**input, max_new_tokens=1)                                             # here we give the training data as an input, and the model isnt really trained, but its a small scale of training/ completion and it completes the text with the most liekly next token, and when the model sees the trainign data it commits the patterns to the short term working memory, so its not really training, but kind of
  truncated_output = output[:, input['input_ids'].shape[1]: input['input_ids'].shape[1] + 1]     # Keep only the first generated token
  del input

  #gc.collect()
  #torch.cuda.empty_cache()


  # Decode and print the prediction
  predicted_target = tokenizer.decode(truncated_output[0], skip_special_tokens=True)
  #print("Predicted Target:", predicted_target)
  return int(predicted_target) # transforms token to be a 1 digit int output prediction, and we dont have to binary encode bc model saw our training data, and knows to output a  0 or 1

#for row in tokenized_data['test']:
#  generatePrediction(row)
prediction = generatePrediction(tokenized_data['test'][0])

# test outputs
print(tokenized_data['test'][0]["input_text"])
print(prediction)

In [None]:
from os import write
import sys

results = {
    "correct": 0,
    "false_positive": 0,
    "false_negative": 0,
    "total": 0
}

for row in tokenized_data['test']:
  prediction = generatePrediction(row)
  if prediction == int(row["line_text"][-1]):
    results["correct"] += 1
    sys.stdout.write(".")
  elif prediction == 1 and row["line_text"][-1] == "0":
    results["false_positive"] += 1
    sys.stdout.write("+")
  elif prediction == 0 and row["line_text"][-1] == "1":
    results["false_negative"] += 1
    sys.stdout.write("-")
  else:
    sys.stdout.write("?")
  results["total"] += 1

print(results)


In [None]:
# not required, only for RAM cleanup
del model
import gc
import torch
gc.collect()
torch.cuda.empty_cache()

In [None]:
import matplotlib.pyplot as plt

del results["total"]
labels = list(results.keys())  # Get the keys of the results dictionary as labels
sizes = list(results.values())  # Get the values of the results dictionary as sizes

plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
plt.title('Results Pie Chart')
plt.show()


In [None]:
# as of 16.01, stop here.

# TODO fine-tune the model (didnt work so far)
hf_dataset["test"] = hf_dataset["test"].remove_columns("input_text")


## Step 3: Save the Fine-Tuned Model

In [None]:
# TODO try and fine-tune the model

In [None]:

trainer.save_model("./fine_tuned_model")
tokenizer.save_pretrained("./fine_tuned_model")


## Step 4: Load and Infer with Fine-Tuned Model

In [None]:

# Load the fine-tuned model and tokenizer
fine_tuned_model_path = "./fine_tuned_model"
model = AutoModelForCausalLM.from_pretrained(fine_tuned_model_path)
tokenizer = AutoTokenizer.from_pretrained(fine_tuned_model_path)

# Prepare input for inference
input_text = "0.01 0.02 0.03 ... 0.05 Target:"
tokenized_input = tokenizer(input_text, return_tensors="pt")

# Generate prediction
output = model.generate(**tokenized_input, max_new_tokens=1)

# Decode and print the prediction
predicted_target = tokenizer.decode(output[0], skip_special_tokens=True)
print("Predicted Target:", predicted_target)


## Step 5: Batch Inference

In [None]:
# TODO modifz to work with the complete hf_dataset test data

# Batch inference for multiple inputs
input_texts = ["0.01 0.02 0.03 ... 0.05 Target:", "0.02 0.03 0.04 ... 0.06 Target:"]

# Tokenize inputs
batch_inputs = tokenizer(input_texts, return_tensors="pt", padding=True, truncation=True)

# Generate predictions
outputs = model.generate(**batch_inputs, max_new_tokens=1)

# Decode predictions
predicted_targets = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
print("Predicted Targets:", predicted_targets)

# TODO generate pie chart