In [10]:
from dotenv import load_dotenv
import os
from huggingface_hub import login

from transformers import pipeline
import torch

# HuggingFace Login

In [11]:
# get the API key from the .env file
load_dotenv()
llama_api = os.getenv("HUGGINGFACE_TOKEN")
llama_api

'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'

In [4]:
# Gated model: Login with a HF token with gated access permission
# Login with your Hugging Face token
login(llama_api)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to C:\Users\ben-s\.cache\huggingface\token
Login successful


In [14]:
# load the model
model_id = "meta-llama/Llama-3.2-3B-Instruct"
# pipe = pipeline(
#     "text-generation",
#     model=model_id,
#     torch_dtype=torch.bfloat16,
#     device_map="auto",
# )

In [None]:
# Version von datacamp
# Quelle: https://www.datacamp.com/tutorial/fine-tuning-llama-3-2

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, TextStreamer
import torch

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained( # loads tokenizer associated with the pre-trained model, which will convert text to tokens and vice versa
    model_id,
    return_dict=True, # ensures that the model returns outputs as dictionaries 
    low_cpu_mem_usage=True, # Optimizes memory usage on the CPU
    device_map="auto", # Uses 16-bit floating-point precision for faster computation
    trust_remote_code=True, # Trusts the model code from the Hugging Face Hub
)

# set pad_token_id to avaoid receiving warning messages
# ensures that padding tokens are properly defined, using the end-of-sequence token as the padding token as a fallback
# eos = end of sequence
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id
if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id


# create the text generation pipeline with model and tokenizer
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto", # automatically selectsCPU or GPU for running the model
)

# run the pipeline with custom prompt
messages = [
    {
        "role": "system",
        "content": "You are a skilled Python developer specializing in database management and optimization.",
    },
    {
        "role": "user",
        "content": "I'm experiencing a sorting issue in my database. Could you please provide Python code to help resolve this problem?",
    },
]

prompt = tokenizer.apply_chat_template( #  uses a tokenizer to format the messages into a prompt suitable for the language model
    messages, tokenize=False, add_generation_prompt=True # sets tokenize=False to keep the text as-is and add_generation_prompt=True to include any necessary prompts for text generation
)

outputs = pipe(prompt, # uses pipe to generate a response to the prompt
                max_new_tokens=256, # limit the length of the generated text
                #do_sample=True, #  enables decoding strategies such as multinomial sampling, beam-search multinomial sampling, Top-K sampling and Top-p sampling.
                ) 

print(outputs[0]["generated_text"])

In [None]:
# display the results as HTML by merging the Markdown code
# Quelle: https://www.datacamp.com/tutorial/fine-tuning-llama-3-2

from IPython.display import Markdown, display

display(
    Markdown(
            outputs[0]["generated_text"].split(
                "<|start_header_id|>assistant<|end_header_id|>"
            )[1]
        )
    )

# Prompt Template

In [2]:
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate

In [19]:
template = "You are an artificial intelligence assistent, answer the question. {question}"
prompt_template = PromptTemplate(template=template, input_variables=["question"])

print(prompt_template.invoke({"question": "What is LangChain?"}))

text='You are an artificial intelligence assistent, answer the question. What is LangChain?'


In [None]:
prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", "{system_message}"),
        ("user", "{argument_text}"),
    ]
)

print(
    prompt_template.invoke(
        {
            "system_message": "You are an expert in argument mining. Extract the arguments in the following text.",
            "argument_text": "The quick brown fox jumps over the lazy dog.",
        }
    )
)


messages=[SystemMessage(content='You are an artificial intelligence assistant. Extract the arguments in the following text.', additional_kwargs={}, response_metadata={}), HumanMessage(content='The quick brown fox jumps over the lazy dog.', additional_kwargs={}, response_metadata={})]


In [15]:
from langchain_huggingface import HuggingFaceEndpoint

In [17]:
llm = HuggingFaceEndpoint(repo_id=model_id,
                          huggingfacehub_api_token=llama_api)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to C:\Users\ben-s\.cache\huggingface\token
Login successful


In [18]:
question = "What is LangChain?"
output=llm.invoke(question)
print(output)

 - A Web3 Storage Ecosystem
LangChain is a decentralized data storage and management system that aims to provide a more scalable, secure, and user-friendly alternative to existing blockchain-based storage solutions. It's a key player in the Web3 storage ecosystem, designed to store and manage data in a decentralized manner.

Key Features of LangChain:

1. **Scalability**: LangChain is designed to handle large amounts of data and scale with the growing needs of its users.
2. **Security**: It utilizes advanced cryptographic techniques to ensure the integrity and confidentiality of stored data.
3. **Interoperability**: LangChain enables seamless interaction with other Web3 applications, allowing for easy data sharing and collaboration.
4. **Flexibility**: The system supports various data formats and storage protocols, making it adaptable to diverse use cases.
5. **Decentralized Governance**: LangChain operates on a decentralized governance model, giving users control over their data and d

In [20]:
# combine the prompt template and the llm
llm_chain = prompt_template | llm

question = "What is LangChain?"
print(llm_chain.invoke({"question": question}))

 
LangChain is an open-source, Rust-based, decentralized data infrastructure framework. It allows developers to build scalable, interoperable, and modular applications for data sharing and collaboration on blockchain networks.

LangChain provides a set of tools and libraries that enable developers to:

1.  **Define and manage data assets**: LangChain allows developers to define and manage data assets, including their metadata, storage, and access controls.
2.  **Build data pipelines**: LangChain enables developers to build data pipelines that can collect, process, and transmit data across different blockchain networks.
3.  **Ensure data interoperability**: LangChain provides a framework for ensuring data interoperability across different blockchain networks, enabling seamless data sharing and collaboration.
4.  **Develop custom applications**: LangChain provides a modular architecture that allows developers to build custom applications for data sharing and collaboration on blockchain n

# viz llama

In [None]:
#pip install torchsummary
#pip install torchviz

: 

In [None]:
from torchsummary import summary
from transformers import AutoModel

# Load the model
model = AutoModel.from_pretrained(model_id)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.FloatTensor instead (while checking arguments for embedding)

In [14]:
# Print the model summary
summary(pipe.model, input_size=(3, 224, 224))

RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.FloatTensor instead (while checking arguments for embedding)

In [7]:
# List the layers
for name, layer in model.named_children():
    print(f"Layer: {name} - {layer}")

Layer: embed_tokens - Embedding(128256, 3072)
Layer: layers - ModuleList(
  (0-27): 28 x LlamaDecoderLayer(
    (self_attn): LlamaSdpaAttention(
      (q_proj): Linear(in_features=3072, out_features=3072, bias=False)
      (k_proj): Linear(in_features=3072, out_features=1024, bias=False)
      (v_proj): Linear(in_features=3072, out_features=1024, bias=False)
      (o_proj): Linear(in_features=3072, out_features=3072, bias=False)
      (rotary_emb): LlamaRotaryEmbedding()
    )
    (mlp): LlamaMLP(
      (gate_proj): Linear(in_features=3072, out_features=8192, bias=False)
      (up_proj): Linear(in_features=3072, out_features=8192, bias=False)
      (down_proj): Linear(in_features=8192, out_features=3072, bias=False)
      (act_fn): SiLU()
    )
    (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
    (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
  )
)
Layer: norm - LlamaRMSNorm((3072,), eps=1e-05)
Layer: rotary_emb - LlamaRotaryEmbedding()


In [12]:
from torchviz import make_dot
from IPython.display import display

# Create a dummy input tensor with the correct type
dummy_input = torch.tensor([[1, 2, 3, 4]], dtype=torch.long)

# Perform a forward pass
output = pipe.model(dummy_input)

# Extract the tensor from the model's output
output_tensor = output.logits

# Visualize the model
dot = make_dot(output_tensor, params=dict(pipe.model.named_parameters()))
dot.render("model", format="png")

dot: graph is too large for cairo-renderer bitmaps. Scaling by 0.365072 to fit


'model.png'

# Prompts

In [None]:
# das hat lokal ca. 20 Minuten gedauert


messages = [
    {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
    {"role": "user", "content": "Who are you?"},
]
outputs = pipe(
    messages,
    max_new_tokens=256,
)
print(outputs[0]["generated_text"][-1])

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


{'role': 'assistant', 'content': 'Arrrr, me hearty! Yer lookin\' fer a pirate\'s tale, eh? Alright then, listen close and I\'ll spin ye a yarn \'bout meself. Me name be Blackbeak Billy, the scurviest pirate to ever sail the Seven Seas! Me ship, the "Maverick\'s Revenge", be me home, and me trusty cutlass be me best mate. I\'ve battled scurvy sea monsters, discovered hidden treasure, and outwitted the Royal Navy more times than I can count. Now, what be bringin\' ye to these fair waters?'}
