In [None]:
!pip install langchain
!pip install langchain_community
!pip install pypdf
!pip install sentence-transformers
!pip install transformers



In [None]:
from langchain.prompts import PromptTemplate
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

# Create a prompt
template = """Question: {question} Answer: Let's think step by step."""
prompt = PromptTemplate.from_template(template)

# Create a Hugging Face model
hf = HuggingFacePipeline.from_model_id(
    model_id="gpt2",
    task="text-generation",
    pipeline_kwargs={"max_new_tokens": 10}
)

# Create a chain with the prompt and the Hugging Face model
chain = prompt | hf

# Invoke the chain to generate a response
question = "What is electroencephalography?"
print(chain.invoke({"question": question}))

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.




This is what you call an objective measurement


In [None]:
from transformers import pipeline

# Initialize a text-generation pipeline with GPT-2
text_generator = pipeline('text-generation', model='gpt2')

# Use the pipeline to generate text
output = text_generator("Once upon a time", max_length=100, do_sample=True)

# Print the generated text
print(output[0]['generated_text'])


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Once upon a time, there were some who sought to kill me with these great evil powers, by giving them strength, by taking over and controlling their servants, and by turning them into murderers.[12] However, the Lords of the Seven Stars knew that the true masters of the world would be those of the Seven Stars,[13] and they were the most dangerous to the weaklings. To prevent this, the Lords put a measure into the hands of Nefari that would prevent them from ever


In [None]:
from transformers import pipeline

# Initialize a sentiment-analysis pipeline
sentiment_analysis = pipeline('sentiment-analysis')

# Use the pipeline to analyze the sentiment of a sentence
result = sentiment_analysis("I love using Transformers library!")[0]
print(f"label: {result['label']}, with score: {result['score']}")


No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

label: POSITIVE, with score: 0.9989345669746399


In [None]:
from transformers import pipeline

# Initialize a named entity recognition pipeline
ner = pipeline('ner', grouped_entities=True)

# Use the pipeline to recognize entities in a sentence
result = ner("My name is John Doe and I live in San Francisco.")
for entity in result:
    print(f"entity: {entity['entity_group']}, word: {entity['word']}, start: {entity['start']}, end: {entity['end']}")


No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision f2482bf (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/998 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/60.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]



entity: PER, word: John Doe, start: 11, end: 19
entity: LOC, word: San Francisco, start: 34, end: 47


In [None]:
from transformers import pipeline

# Initialize a question-answering pipeline
question_answerer = pipeline('question-answering')

# Use the pipeline to answer a question
result = question_answerer({
    'question': 'What is the capital of France?',
    'context': 'Paris is the capital city of France.'
})
print(f"answer: {result['answer']}, start: {result['start']}, end: {result['end']}")


No model was supplied, defaulted to distilbert-base-cased-distilled-squad and revision 626af31 (https://huggingface.co/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.


answer: Paris, start: 0, end: 5


In [None]:
from transformers import pipeline

# Initialize a text-summarization pipeline
summarizer = pipeline('summarization')

# Use the pipeline to summarize a text
result = summarizer("""
    Transformers is a state-of-the-art general-purpose architecture for natural language processing.
    It is based on self-attention mechanism and provides large improvements over previous models.
    """, max_length=50)
print(f"summary: {result[0]['summary_text']}")


No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/1.80k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Your min_length=56 must be inferior than your max_length=50.


summary:  Transformers is a state-of-the-art general-purpose architecture for natural language processing . It is based on self-attention mechanism and provides large improvements over previous models . Transformers is an open-source architecture for language processing.


In [None]:
from transformers import pipeline

# Initialize a translation pipeline
translator = pipeline('translation_en_to_fr')

# Use the pipeline to translate a text
result = translator("Hello, how are you?")
print(f"translation: {result[0]['translation_text']}")


No model was supplied, defaulted to t5-base and revision 686f1db (https://huggingface.co/t5-base).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


translation: Bonjour, comment êtes-vous?
