In [3]:
#pip install transformers

In [4]:
#pip install sentence-transformers

In [7]:
from transformers import BartForConditionalGeneration, BartTokenizer
from sentence_transformers import SentenceTransformer, util
import numpy as np

# Load the BART model and tokenizer
model_name = "facebook/bart-large-cnn"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

# Load a sentence-transformers model for computing embeddings
embedder = SentenceTransformer('paraphrase-distilroberta-base-v1')

# Example documents
documents = [
    "Python is an interpreted, high-level, general-purpose programming language.",
    "Artificial intelligence (AI) is intelligence demonstrated by machines, unlike the natural intelligence displayed by humans and animals.",
    "The Industrial Revolution was the transition to new manufacturing processes in the period from about 1760 to sometime between 1820 and 1840.",
]

# Compute embeddings for documents
document_embeddings = embedder.encode(documents, convert_to_tensor=True)

def retrieve(query, documents, document_embeddings, top_k=1):
    # Encode the query
    query_embedding = embedder.encode(query, convert_to_tensor=True)

    # Compute cosine similarity between query and documents
    similarities = util.cos_sim(query_embedding, document_embeddings)

    # Get the indices of the top k most similar documents
    topk_indices = np.argsort(similarities.numpy().squeeze())[-top_k:][::-1]

    # Return the top k documents
    return [documents[i] for i in topk_indices]

def generate_response(query):
    # Retrieve relevant documents
    relevant_documents = retrieve(query, documents, document_embeddings, top_k=1)

    # Concatenate the query and retrieved document
    input_text = query + " </s></s> " + relevant_documents[0]

    # Tokenize and encode the input text
    input_ids = tokenizer.encode(input_text, return_tensors='pt')

    # Generate the response
    output = model.generate(input_ids, max_length=300, num_beams=4, early_stopping=True)

    # Decode the generated response
    response = tokenizer.decode(output[0], skip_special_tokens=True)

    return response

# Example usage
query = "What is Python?"
response = generate_response(query)
print("Generated Response:", response)


merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Generated Response: Python is an interpreted, high-level, general-purpose programming language. It is used to write software for computers, phones, and other devices. Python is a free, open-source programming language with many built-in features. For more information on Python, visit python.org.


In [6]:
import requests

url = "https://huggingface.co/models"
try:
    response = requests.get(url, timeout=30)  # Increase the timeout value as needed
    # Process the response...
except requests.Timeout:
    print("The request timed out. Please try again later.")


In [8]:
from transformers import BartForConditionalGeneration, BartTokenizer
from sentence_transformers import SentenceTransformer, util
import numpy as np

# Load the BART model and tokenizer
model_name = "facebook/bart-large-cnn"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

# Load a sentence-transformers model for computing embeddings
embedder = SentenceTransformer('paraphrase-distilroberta-base-v1')

# Example documents
documents = [
    "Python is an interpreted, high-level, general-purpose programming language.",
    "Artificial intelligence (AI) is intelligence demonstrated by machines, unlike the natural intelligence displayed by humans and animals.",
    "The Industrial Revolution was the transition to new manufacturing processes in the period from about 1760 to sometime between 1820 and 1840.",
]

def retrieve(query, documents, document_embeddings, top_k=1):
    # Encode the query
    query_embedding = embedder.encode(query, convert_to_tensor=True)

    # Compute cosine similarity between query and documents
    similarities = util.cos_sim(query_embedding, document_embeddings)

    # Get the indices of the top k most similar documents
    topk_indices = np.argsort(similarities.numpy().squeeze())[-top_k:][::-1]

    # Return the top k documents
    return [documents[i] for i in topk_indices]

def generate_response(query, documents):
    # Retrieve relevant documents
    relevant_documents = retrieve(query, documents, document_embeddings, top_k=1)

    # Concatenate the query and retrieved document
    input_text = query + " </s></s> " + relevant_documents[0]

    # Tokenize and encode the input text
    input_ids = tokenizer.encode(input_text, return_tensors='pt')

    # Generate the response
    output = model.generate(input_ids, max_length=300, num_beams=4, early_stopping=True)

    # Decode the generated response
    response = tokenizer.decode(output[0], skip_special_tokens=True)

    return response

# Example usage
query = "What is Python?"
response = generate_response(query, documents)
print("Generated Response:", response)


Generated Response: Python is an interpreted, high-level, general-purpose programming language. It is used to write software for computers, phones, and other devices. Python is a free, open-source programming language with many built-in features. For more information on Python, visit python.org.
