In [3]:
from huggingface_hub import login
from dotenv import load_dotenv
import os

In [4]:
# authenticate in hub
load_dotenv()
HUGGINGFACE_HUB_TOKEN=os.getenv('HUGGINGFACE_HUB_TOKEN')

In [5]:
prompt = "Hello! How can I assist you today?"

In [6]:
login(token=HUGGINGFACE_HUB_TOKEN, add_to_git_credential=True)

Token is valid (permission: write).
Your token has been saved in your configured git credential helpers (manager).
Your token has been saved to C:\Users\Administrator\.cache\huggingface\token
Login successful


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "meta-llama/Meta-Llama-3.1-405B" 

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)


In [None]:
prompt = "Hello! How can I assist you today?"

inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=50)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)


#### Without downloading the model

In [8]:
from huggingface_hub import InferenceApi, InferenceClient

api = InferenceApi(repo_id="meta-llama/Meta-Llama-3.1-405B", token=HUGGINGFACE_HUB_TOKEN)

prompt = "Hello! How can I assist you today?"
response = api(inputs=prompt)
print(response)

#### Text to audio generation

In [None]:
from transformers import pipeline

pipe = pipeline(model="suno/bark-small")
output = pipe("Hey it's HuggingFace on the phone!")

audio = output["audio"]
sampling_rate = output["sampling_rate"]

### Using langchain

In [None]:
from langchain import HuggingFaceHub, PromptTemplate, LLMChain
from huggingface_hub import InferenceApi



llm = HuggingFaceHub(
    repo_id="meta-llama/Meta-Llama-3.1-405B",
    huggingfacehub_api_token=HUGGINGFACE_HUB_TOKEN
)

prompt = PromptTemplate(
    input_variables=["input_text"],
    template="You are a helpful assistant. Answer the following: {input_text}"
)

llm_chain = LLMChain(prompt=prompt, llm=llm)

input_text = "What is the capital of France?"

response = llm_chain.run(input_text)
print(response)


#### Using inference API

In [None]:
from huggingface_hub import InferenceClient

model = "meta-llama/Meta-Llama-3.1-405B"
api = InferenceClient(model=model, token=HUGGINGFACE_HUB_TOKEN)

response = api(inputs=prompt)
print(response)
