In [1]:
pip install openai beautifulsoup4 requests


Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import openai
import requests
from bs4 import BeautifulSoup

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load model and tokenizer
model_name = "microsoft/DialoGPT-medium"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Ensure the tokenizer has a unique pad_token_id
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token  # Set pad_token to eos_token if not already set

def chat_with_huggingface(user_input, history=""):
    """
    Chat with Hugging Face model using proper attention masks.
    :param user_input: User's input
    :param history: Conversation history
    :return: Chatbot's response
    """
    # Encode input and history
    input_ids = tokenizer.encode(history + user_input + tokenizer.eos_token, return_tensors="pt", padding=True)

    # Generate an attention mask
    attention_mask = input_ids.ne(tokenizer.pad_token_id).int()

    # Generate the chatbot's response
    bot_output = model.generate(
        input_ids,
        attention_mask=attention_mask,
        max_length=1000,
        pad_token_id=tokenizer.pad_token_id
    )
    response = tokenizer.decode(bot_output[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
    return response

# Example usage
user_message = "Hello! What can you do?"
response = chat_with_huggingface(user_message)
print("Chatbot:", response)


Chatbot: I'm a student at the University of Texas at Austin.


In [4]:
pip install transformers


Note: you may need to restart the kernel to use updated packages.


In [5]:
import os

# Disable the warning about symlinks
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"


In [6]:
pip install requests


Note: you may need to restart the kernel to use updated packages.


# SCRAPPING THE WEBSITE

In [7]:
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B")

tokenizer.pad_token = tokenizer.eos_token

def ask_chatgpt(prompt, max_new_tokens=100):
    """
    Queries GPT-Neo with the given prompt.
    :param prompt: The user prompt
    :param max_new_tokens: Number of tokens to generate
    :return: The model's response
    """
    try:
        # Tokenize the input
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
        input_ids = inputs["input_ids"]
        attention_mask = inputs["attention_mask"]  # Attention mask is included

        # Get the input length
        input_length = input_ids.size(1)

        # Truncate the input to ensure it fits within the model's token limit
        max_input_length = 2048 - max_new_tokens
        if input_length > max_input_length:
            print(f"Input is too long ({input_length} tokens). Truncating to {max_input_length} tokens.")
            input_ids = input_ids[:, -max_input_length:]  # Truncate from the beginning
            attention_mask = attention_mask[:, -max_input_length:]  # Truncate the attention mask as well

        # Generate a response
        response = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,  # Use the attention mask here
            max_new_tokens=max_new_tokens,
            pad_token_id=tokenizer.eos_token_id  # Handle padding
        )

        # Decode and return the response
        return tokenizer.decode(response[0], skip_special_tokens=True)

    except Exception as e:
        return f"Error generating response: {e}"
        
import requests

def fetch_website_content(url):
    """
    Fetches and extracts relevant content from a given website URL.

    :param url: The website URL to scrape
    :return: Extracted content as a string
    """
    try:
        # Send a GET request to the URL
        response = requests.get(url)
        response.raise_for_status()  # Raise an HTTPError for bad responses (4xx and 5xx)
        
        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.text, "html.parser")
        
        # Extract the title
        title = soup.title.string if soup.title else "No title available"
        
        # Extract headings (h1, h2, h3)
        headings = []
        for tag in ['h1', 'h2', 'h3']:
            headings.extend([heading.get_text(strip=True) for heading in soup.find_all(tag)])
        
        # Extract paragraphs
        paragraphs = [p.get_text(strip=True) for p in soup.find_all('p')]
        
        # Combine extracted information
        content = f"Title: {title}\n\n"
        content += "Headings:\n" + "\n".join(headings) + "\n\n"
        content += "Content:\n" + " ".join(paragraphs)
        
        return content if content.strip() else "No relevant content found on the webpage."
    
    except requests.exceptions.RequestException as e:
        return f"Error fetching the website content: {e}"
    except Exception as e:
        return f"An unexpected error occurred: {e}"

 # Main script
if __name__ == "__main__":
    # Input the website URL
    url = input("Enter the website URL: ")
    website_content = fetch_website_content(url)
    
    print("\nExtracted Content:\n")
    print(website_content)

Enter the website URL:  https://botpenguin.com/



Extracted Content:

Title: Free Chatbot maker | Chatbot for Website, WhatsApp | BotPenguin

Headings:
Engage, Converse and Convertyour visitors using AI Chatbot Agent
Performance Highlights
An AI chatbot that can beanywhere, anytime
Customize your AI Chatbot with GenerativeAI and Custom LLMs
One platform with all the features you wantfrom your Chatbot Maker
For every Business Problem, BotPenguinhas the Best Solution
Achieve 70% higher conversionswith AI Chatbot
What makes BotPenguin the idealAI Chatbot Solution Provider?
BotPenguin’s AI Chatbot hassomething special for every industry
Integrate with your favorite applications andconfigure for a seamless experience
Your Privacy, Our Priority
100%DataSecurity
What our Customers say
Solve 80% of customer querieswith AI Chatbot
Omnichannel Customer Support
Marketing Automations
Generate More Sales
Better E-Commerce
Live Chat
Unified Inbox
Endless Integrations
Onboarding Support and Guidance
No-code Chatflow Builder
Zero Setup Fee
Multiling

In [8]:
# Input user query
user_query = input("\nEnter your question: ")
    
    # Prepare the prompt for GPT-Neo
prompt = f"Based on the following website content:\n{website_content}\n\nUser: {user_query}"
    
    # Get response from GPT-Neo model
response = ask_chatgpt(prompt)
print("\nGPT-Neo Response:\n")
print(response)


Enter your question:  services offered


Input is too long (2048 tokens). Truncating to 1948 tokens.

GPT-Neo Response:

inhas the Best Solution
Achieve 70% higher conversionswith AI Chatbot
What makes BotPenguin the idealAI Chatbot Solution Provider?
BotPenguin’s AI Chatbot hassomething special for every industry
Integrate with your favorite applications andconfigure for a seamless experience
Your Privacy, Our Priority
100%DataSecurity
What our Customers say
Solve 80% of customer querieswith AI Chatbot
Omnichannel Customer Support
Marketing Automations
Generate More Sales
Better E-Commerce
Live Chat
Unified Inbox
Endless Integrations
Onboarding Support and Guidance
No-code Chatflow Builder
Zero Setup Fee
Multilingual Chatbot
Thorough AI Capabilities
Advanced Analytics

Content:
Why BotPenguin Product Solutions Partners Resources Integrations Experience 80+ world-class integrations. Key Features Take your business to the next level with our awesome key features. Live Chat Stay in the loop with your clientele 24*7! Unified Inb

# STRUCTURING THE CONTENT

In [18]:
import re
from bs4 import BeautifulSoup

# Sample scraped content (simplified for demonstration)
scraped_content = """
Title: Free Chatbot maker | Chatbot for Website, WhatsApp | BotPenguin
Headings:
- Engage, Converse and Convert your visitors using AI Chatbot Agent
- Performance Highlights
- What makes BotPenguin the ideal AI Chatbot Solution Provider?
- Marketing Automations
- Better E-Commerce
- Omnichannel Customer Support
- Lead Generation

Content:
- One platform with all the features you want from your Chatbot Maker.
- Achieve 70% higher conversions with AI Chatbot.
- Integrate with your favorite applications for a seamless experience.
- Multilingual Chatbot support available.
"""

def extract_and_structure_content(content):
    # Step 1: Extract Title
    title_match = re.search(r"Title:\s*(.*)", content)
    title = title_match.group(1) if title_match else "No title found"

    # Step 2: Extract Headings
    headings = re.findall(r"-\s*([A-Za-z0-9\s,&]+)", content)

    # Step 3: Extract Key Content (bullet points or descriptions)
    key_content = []
    content_match = re.search(r"Content:\s*(.*)", content, re.DOTALL)
    if content_match:
        # Capture all text under "Content" heading and split by bullet points
        key_content = content_match.group(1).strip().split("\n- ")
    
    # Step 4: Prepare structured data for input to the chatbot model
    structured_data = {
        "title": title,
        "headings": headings,
        "key_content": key_content
    }

    return structured_data

# Extract structured content from the raw scraped content
structured_content = extract_and_structure_content(scraped_content)

# Display the structured content
print("Structured Content for Chatbot Input:")
print(structured_content)

Structured Content for Chatbot Input:
{'title': 'Free Chatbot maker | Chatbot for Website, WhatsApp | BotPenguin', 'headings': ['Engage, Converse and Convert your visitors using AI Chatbot Agent\n', 'Performance Highlights\n', 'What makes BotPenguin the ideal AI Chatbot Solution Provider', 'Marketing Automations\n', 'Better E', 'Commerce\n', 'Omnichannel Customer Support\n', 'Lead Generation\n\nContent', 'One platform with all the features you want from your Chatbot Maker', 'Achieve 70', 'Integrate with your favorite applications for a seamless experience', 'Multilingual Chatbot support available'], 'key_content': ['- One platform with all the features you want from your Chatbot Maker.', 'Achieve 70% higher conversions with AI Chatbot.', 'Integrate with your favorite applications for a seamless experience.', 'Multilingual Chatbot support available.']}


In [20]:
pip install torch torchvision torchaudio




In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Structured Content for Chatbot Input
website_content = {
    'title': 'Free Chatbot maker | Chatbot for Website, WhatsApp | BotPenguin',
    'headings': [
        'Engage, Converse and Convert your visitors using AI Chatbot Agent',
        'Performance Highlights',
        'What makes BotPenguin the ideal AI Chatbot Solution Provider',
        'Marketing Automations',
        'Better E',
        'Commerce',
        'Omnichannel Customer Support',
        'Lead Generation',
        'One platform with all the features you want from your Chatbot Maker',
        'Achieve 70',
        'Integrate with your favorite applications for a seamless experience',
        'Multilingual Chatbot support available'
    ],
    'key_content': [
        'One platform with all the features you want from your Chatbot Maker.',
        'Achieve 70% higher conversions with AI Chatbot.',
        'Integrate with your favorite applications for a seamless experience.',
        'Multilingual Chatbot support available.'
    ]
}

# Load GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

# Ensure the model is in evaluation mode
model.eval()

def generate_response(user_input, context):
    """
    Generate a response to the user based on their input and website context.
    """
    # Normalize user input to lowercase for easier keyword matching
    user_input = user_input.lower().strip()

    # Check for specific keywords in user input and provide relevant predefined responses
    if "features" in user_input or "platform" in user_input:
        relevant_info = "BotPenguin is a feature-rich platform offering a wide range of tools for AI Chatbots, including lead generation, marketing automation, omnichannel support, and more."
    elif "conversions" in user_input or "conversion" in user_input:
        relevant_info = "Achieve 70% higher conversions with BotPenguin’s AI-powered chatbots. We help businesses boost their conversion rates through automation."
    elif "integration" in user_input:
        relevant_info = "BotPenguin can seamlessly integrate with over 80 platforms, providing a unified experience for businesses."
    elif "multilingual" in user_input:
        relevant_info = "BotPenguin’s chatbot supports multiple languages, ensuring seamless communication across regions."
    elif "omnichannel" in user_input:
        relevant_info = "BotPenguin provides omnichannel support, ensuring you can engage with users on websites, WhatsApp, and more."
    elif "lead generation" in user_input:
        relevant_info = "BotPenguin’s AI chatbot is designed to assist in lead generation by automating customer interactions and capturing relevant data."
    elif "marketing automation" in user_input:
        relevant_info = "BotPenguin includes marketing automation tools to optimize customer engagement and increase conversion rates."
    elif "help" in user_input or "services" in user_input:
        relevant_info = "BotPenguin provides a comprehensive suite of services for businesses, including AI-powered chatbots, customer support, lead generation, and more."
    else:
        # If no match found, generate a more dynamic response
        relevant_info = generate_dynamic_response(user_input)
    
    # Construct the prompt to give clear context to the model
    prompt = f"User Input: {user_input}\nRelevant Information from Website: {relevant_info}\nResponse:"

    # Encode the input prompt
    input_ids = tokenizer.encode(prompt, return_tensors="pt")

    # Attention mask to handle padding tokens
    attention_mask = torch.ones(input_ids.shape, dtype=torch.long)

    # Generate the response using GPT-2 with sampling enabled for more natural output
    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=100,  # Increase response length slightly for more detail
            num_return_sequences=1,
            attention_mask=attention_mask,
            do_sample=True,  # Allow randomness to avoid repetitive responses
            temperature=0.7,  # Control randomness in the output
            eos_token_id=tokenizer.eos_token_id,  # Stop at end-of-sequence token
            pad_token_id=tokenizer.eos_token_id,  # Set pad_token_id to eos_token_id to avoid warning
            no_repeat_ngram_size=2,  # Avoid repeating n-grams in the output (e.g., avoid repeated phrases)
            top_p=0.95,  # Use nucleus sampling for more natural and diverse outputs
            top_k=50  # Use top-k sampling to limit the options and avoid overfitting to common words
        )

    # Decode and clean the output
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    
    # Remove the prompt portion to get only the generated response
    response = response.replace(prompt, '').strip()

    # Return the cleaned response
    return response

def generate_dynamic_response(user_input):
    """
    Function to dynamically generate a response for questions that don't directly match the structured content.
    """
    # If the question doesn't match any known content, we generate a fallback response
    return "I am still learning, but I can help with information about features, services, lead generation, and more. Feel free to ask about anything!"

# Main chatbot loop for testing
def chatbot():
    print("Hello! I am the BotPenguin chatbot. Ask me anything related to our services.")
    print("Type 'exit' to end the conversation.")
    
    conversation_history = ""  # Maintain conversation history to make responses more coherent
    
    while True:
        # Get user input
        user_input = input("Enter your question: ")
        
        if user_input.lower() == 'exit':
            print("Goodbye!")
            break
        
        # Update the conversation history to maintain context
        conversation_history += f"User: {user_input}\n"

        # Generate the response
        response = generate_response(user_input, website_content)
        
        # Update the conversation history with the bot's response
        conversation_history += f"Bot: {response}\n"
        
        # Print the bot's response
        print(f"Bot: {response}")

if __name__ == "__main__":
    chatbot()


Hello! I am the BotPenguin chatbot. Ask me anything related to our services.
Type 'exit' to end the conversation.


Enter your question:  services


Bot: Our bots have been programmed to respond to emails, phone calls, text messages, chat messages and other communications, even when users are offline. For instance, we can respond quickly to a contact that's trying to go shopping. We even set up a bot to read emails on our behalf


Enter your question:  history about the website 


Bot: This is a very useful guide. If you have any questions or requests, please feel free.


Enter your question:  pricing ?


Bot: I am currently working on a new project, a demo, with a lot of help from other developers.
Thanks for your interest in this project. I hope to get your feedback on it as soon as possible.


Enter your question:  features


Bot: I believe I've had a lot of fun with BotBotPengo. The team is very active and is doing a great job, but I have had some frustrations with their lack of development and development cycles. A lot is at stake and


Enter your question:  multilingual


Bot: We can provide more information about your experience with the Bot. We want to provide you with a list of supported languages. You can find more details about each language in our "Language Settings".
What is the latest version of Bot?
BotPeng.com is a free and open source online learning tool. It is designed
