In [None]:
class Test:
    def __init__(self):
        self.foo_list = []
        self.bar_dict = {}
        self.baz_tuple = ()
        self.qux_set = set() # Changed variable name to qux_set to avoid conflict with the qux method.
    def __str__(self):
        return f'Test(foo={self.foo_list}, bar={self.bar_dict}, baz={self.baz_tuple}, qux={self.qux_set})' # Changed variable name to qux_set
    def __repr__(self):
        return f'Test(foo={self.foo_list}, bar={self.bar_dict}, baz={self.baz_tuple}, qux={self.qux_set})' # Changed variable name to qux_set
    def foo(self):
        print('foo')
        return 'foo'
    def bar(self):
        print('bar')
        return 'bar'
    def baz(self):
        print('baz')
        return 'baz'
    def qux(self):
        print('qux')
        return 'qux'
if __name__ == '__main__':
    t = Test()
    print(t)
    print(t.foo())
    print(t.bar())
    print(t.baz())
    print(t.qux())


# Hugging Face Tutorial:
Setup
Configurations and Installations and Running

In [None]:
from huggingface_hub import login
from google.colab import userdata

login(token = userdata.get('YOUR_HUGGING_FACE_TOKEN'))


In [None]:
from transformers import AutoModelForMaskedLM, AutoTokenizer, pipeline

# Load the pre-trained model and tokenizer from Hugging Face
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForMaskedLM.from_pretrained(model_name)

# Create a pipeline for masked language modeling
nlp_pipeline = pipeline("fill-mask", model=model, tokenizer=tokenizer)

# Test the pipeline with a simple input
test_sentence = "The quick brown fox jumps over the [MASK] dog."
result = nlp_pipeline(test_sentence)

print(result)


In [3]:
!pip install accelerate protobuf sentencepiece torch git+https://github.com/huggingface/transformers huggingface_hub notebook


In [None]:
!jupyter notebook


# Loading The Pre-Trained Language Model Llama 2

In [None]:
from transformers import (AutoModelForCausalLM,
AutoTokenizer, pipeline)
from huggingface_hub import login
import torch

# Hugging Face access token 'access-token'
login(token="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")

model_id = "NousResearch/Llama-2-7b-chat-hf"
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.use_default_system_prompt = False


In [None]:
config = model.config
#Retrieves the configuration of the loaded model,
#which includes details such as the model architecture,
#number of layers, hidden size, etc.

# print(config)

#Outputs a summary of the model architecture,
#showing the various layers and their configurations.
print(model)


# Generating Text Using Llama 2 from Hugging Face

In [None]:
#This is the text input you provide to the model.
#It’s like asking the model a question or giving it a starting
# sentence.
sample_prompt = "Hello, how are you?"

#The tokenizer converts your text into
#tokens (numbers that represent words or sub-words).
#This is necessary because the model works with numbers,
#not raw text. The return_tensors="pt" part tells the t
#okenizer to return the tokens as a PyTorch tensor,
#which is a data structure used in machine learning.
input_ids = tokenizer.encode(sample_prompt, return_tensors="pt")

#This line checks if you have a GPU available to speed up
#the processing. Else, it will just use your CPU.
input_ids = input_ids.to('cuda' if torch.cuda.is_available() else 'cpu')

#The model generates a response based on your input tokens:
output = model.generate(input_ids, max_length=50, num_beams=5, no_repeat_ngram_size=2)
#max_length=50:The maximum length of the generated response
#is 50 tokens. You can of course adjust this to get
#longer responses.
#num_beams=5: This uses a technique called beam search
#with 5 beams to generate better quality responses.
#no_repeat_ngram_size=2: This prevents the model
# from repeating the same phrase or sequence words.
#The tokenizer converts the generated tokens back into human-readable text.

#Decode the output back to text
response = tokenizer.decode(output[0], skip_special_tokens=True)

#Finally, output the respnose
print(f"Generated Response: {response}")


# Named Entity Recognition

In [None]:
from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline

# Load a pre-trained NER model
model_name = "dbmdz/bert-large-cased-finetuned-conll03-english"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

# Create an NER pipeline
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)

# Test the NER pipeline with a sample sentence
test_sentence = "Apple is planning to build a new campus in Austin."
result = ner_pipeline(test_sentence)
print(result)


In [None]:
def display_masked_sentence(sentence, ner_results):
    masked_sentence = sentence
    for entity in ner_results:
        entity_word = entity['word']
        entity_label = entity['entity']
        masked_sentence = masked_sentence.replace(entity_word, f"[{entity_label}]")
    return masked_sentence

# Test the function with the NER results
masked_sentence = display_masked_sentence(test_sentence, result)
print(masked_sentence)


# Summarization of Text


In [None]:
# Load the summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Test the summarizer with a longer input text
long_text = (
    "The quick brown fox jumps over the lazy dog. The lazy dog, however, was not really lazy. "
    "It was simply tired from chasing after the quick brown fox all day. The two animals had a "
    "long history of playful rivalry, with the fox always outwitting the dog. Despite their differences, "
    "they shared a bond of mutual respect and friendship."
)

summary = summarizer(long_text, max_length=50, min_length=25, do_sample=False)

print(summary[0]['summary_text'])


# Performing Question Answering

In [None]:
# Load the QA pipeline
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")

# Define the context and the question
context = (
    "Apple Inc. is an American multinational technology company headquartered in Cupertino, California, "
    "that designs, develops, and sells consumer electronics, computer software, and online services. "
    "It is considered one of the Big Five companies in the U.S. information technology industry, along with "
    "Amazon, Google, Microsoft, and Facebook."
)
question = "Where is Apple Inc. headquartered?"

# Get the answer
answer = qa_pipeline(question=question, context=context)

print(f"Question: {question}")
print(f"Answer: {answer['answer']}")


# Quest 3 - Create a Llama 2 Chat Agent

# Learning Outcomes

---

By the end of this quest, you will be able to:

* Set up and configure a chat agent that intelligently integrates a QA dataset with athe Llama 2 model.
* Implement functionality that updates the QA dataset with new entries when an answer is generated by a Llama 2 model.
* Develop an interactive user interface for your chat agent using Gradio, allowing users to interact with it through a web-based platform.
* Understand how to balance between pre-existing knowledge (QA dataset) and AI-generated content in a conversational agent.
* Deploy your chat agent as a web application that becomes more intelligent over time as it learns from new questions and answers.

# Quest Details

---
**Introduction**
In this quest, you will take your skills to the next level by building a dynamic chat agent using the Llama 2 model from Hugging Face Transformers. Unlike a basic chatbot, this chat agent will first check if the question has a predefined answer in a QA dataset, and if not, it will generate a response using the Llama 2 model.

The agent will also automatically update the dataset with new Q&A pairs, ensuring that it becomes more knowledgeable over time. By integrating Gradio, you’ll create an interactive user interface for your chat agent, making it accessible and user-friendly.
This quest will equip you with practical experience in handling both structured (QA dataset) and unstructured (LLM-based responses) data sources, as well as deploying an AI-powered chat service.

For technical help on the StackUp platform & quest-related questions, join our Discord, head to the quest-helpdesk channel and look for the correct thread to ask your question.


**Deliverables**

1. This quest has 1 deliverable.
2. A screenshot


**Set up and Install Requirements**

In [None]:
!pip install transformers llama_index gradio pandas aiohttp asyncio



## A Sample Training Dataset


# Load The QA DATA

In [None]:
import pandas as pd

# Sample QA data for Computer Science Theory
qa_data = {
    'question': [
        "What is an algorithm?",
        "What is the difference between a stack and a queue?",
        "What is Big O notation?",
        "Explain the concept of dynamic programming.",
        "What is the purpose of a hash table?",
        "What is a binary tree?",
        "What is a graph in computer science?",
        "Define computational complexity.",
        "What is a sorting algorithm?",
        "Explain the concept of recursion."
    ],
    'answer': [
        "An algorithm is a step-by-step procedure or formula for solving a problem. It is a sequence of instructions that is followed to achieve a desired result.",
        "A stack is a data structure that follows the Last In First Out (LIFO) principle, while a queue follows the First In First Out (FIFO) principle.",
        "Big O notation is used to describe the performance or complexity of an algorithm in terms of time or space. It characterizes algorithms by their worst-case or upper bound performance.",
        "Dynamic programming is a method for solving complex problems by breaking them down into simpler subproblems. It involves storing the results of subproblems to avoid redundant computations.",
        "A hash table is a data structure that maps keys to values for efficient data retrieval. It uses a hash function to compute an index into an array of buckets or slots, from which the desired value can be found.",
        "A binary tree is a data structure in which each node has at most two children, referred to as the left child and the right child. It is used for efficient searching and sorting.",
        "A graph is a collection of nodes (vertices) and edges (connections) that link pairs of nodes. Graphs are used to model relationships between objects.",
        "Computational complexity is a measure of the amount of resources, such as time and space, that an algorithm requires relative to the size of the input data.",
        "A sorting algorithm is a method for arranging elements in a list or array in a specific order, typically ascending or descending. Examples include bubble sort, merge sort, and quicksort.",
        "Recursion is a programming technique where a function calls itself in order to solve a problem. The function typically has a base case to terminate the recursion and a recursive case to break the problem into smaller subproblems."
    ]
}

# Create a DataFrame
df = pd.DataFrame(qa_data)

# Save to CSV
df.to_csv('qa_dataset.csv', index=False)


In [None]:
import pandas as pd

# Load or create your QA dataset
qa_data = pd.read_csv('qa_dataset.csv')
qa_dict = dict(zip(qa_data['question'], qa_data['answer']))


# Update the QA Bot to Include NER...

**Install Gradio Dependencies**

In [None]:
!pip install gradio python-dotenv huggingface_hub transformers accelerate protobuf sentencepiece torch torchvision torchaudio torchtext torchdata trl


Freeze Package Requirements
```
!pip freeze> requirements.txt
```

In [None]:
!pip freeze> requirements.txt


Save Token as a Git credential#

In [None]:
!git config --global credential.helper store
!git init


# Import Packages

In [None]:
!pip install optimum[onnxruntime]


Chat System Engine

In [None]:
import pandas as pd
import asyncio
import threading
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
import spacy
from nltk.corpus import wordnet
import nltk
import gradio as gr
from huggingface_hub import login
from google.colab import userdata
import torch
import requests

# Download NLTK resources
nltk.download('wordnet')

# Authenticate with Hugging Face Hub
login(token=userdata.get("HF_TOKEN"), add_to_git_credential=True)

# Load QA dataset
qa_data = pd.read_csv('qa_dataset.csv')

# Ensure all entries are strings and handle missing values
qa_data['question'] = qa_data['question'].fillna('').astype(str)
qa_data['answer'] = qa_data['answer'].fillna('').astype(str)

qa_dict = dict(zip(qa_data['question'], qa_data['answer']))

# Initialize model and tokenizer
model_name = "google/flan-t5-base"  # Efficient model
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained(model_name)
device = 0 if torch.cuda.is_available() else -1
generator = pipeline('text2text-generation', model=model, tokenizer=tokenizer, device=device)

# Load spaCy model for NER
nlp = spacy.load('en_core_web_sm')

# Initialize emotion detection model
emotion_analyzer = pipeline('text-classification',
                            model="j-hartmann/emotion-english-distilroberta-base",
                            top_k=None,
                            device=device)

# Memoization dictionary
response_cache = {}

# Extract keywords from text with parts of speech
def extract_keywords(text):
    doc = nlp(text)
    keywords = []
    for token in doc:
        if token.pos_ in ['NOUN', 'VERB', 'ADJ'] and not token.is_stop and not token.is_punct:
            keywords.append(token.text.lower())
            # Add synonyms
            for syn in wordnet.synsets(token.text):
                for lemma in syn.lemmas():
                    synonyms = lemma.name().lower()
                    if synonyms not in keywords:
                        keywords.append(synonyms)
    return keywords

# Simple retrieval function with keyword prioritization
def retrieve_relevant_info(question):
    """
    Retrieve relevant information from the QA dataset based on keywords extracted from the question.

    Parameters:
    - question (str): The question to find relevant information for.

    Returns:
    - str: A relevant piece of information from the QA dataset, or an empty string if not found.
    """
    keywords = extract_keywords(question)
    if not keywords:
        return ""

    relevant_info = ""
    for keyword in keywords:
        for q in qa_dict.keys():
            if keyword in q.lower():
                relevant_info += qa_dict[q] + " "
                if len(relevant_info) > 500:  # Limit the length of retrieved info
                    break
        if len(relevant_info) > 500:
            break
    return relevant_info.strip()

# Asynchronous response generation with RAG approach
# Asynchronous response generation with RAG approach
async def generate_response(question):
    # Retrieve relevant information from the dataset
    relevant_info = retrieve_relevant_info(question)
    input_text = f"Context: {relevant_info}\nQuestion: {question}" if relevant_info else question

    try:
        # Generate response using the model
        result = await asyncio.to_thread(lambda: generator(
            input_text,
            max_length=150,
            temperature=0.7,
            top_k=50,
            top_p=0.9,
            repetition_penalty=1.0,
            num_return_sequences=1,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
            do_sample=True
        )[0]['generated_text'])

        # Clean up the response
        clean_response = result.strip()

        # Simulate a delay (e.g., for generating a response or querying a service)
        await asyncio.sleep(2)  # Simulates an I/O operation or long computation

        return clean_response

    except Exception as e:
        return f"An error occurred while generating a response: {str(e)}"


# Extract entities from text using spaCy
def extract_entities(text):
    doc = nlp(text)
    entities = {ent.label_: ent.text for ent in doc.ents}
    return entities

# Analyze emotions using the emotion detection model
def analyze_emotion(text):
    result = emotion_analyzer(text)
    top_emotion = sorted(result[0], key=lambda x: x['score'], reverse=True)[0]
    return top_emotion['label'], top_emotion['score']

# Define a function to fetch a joke from a joke API
def fetch_joke():
    try:
        response = requests.get("https://v2.jokeapi.dev/joke/Any?type=single")
        response.raise_for_status()
        joke_data = response.json()
        if 'joke' in joke_data:
            return f"{joke_data['joke']} 😂"
        else:
            return "Sorry, Something went Wrong"
    except (requests.RequestException, KeyError, TypeError) as e:
        return f"Sorry, Something went Wrong!: {str(e)}"

# Chat function with RAG approach, memoization, and emotion-aware responses
def chat_function_with_emotions(question):
    if question in response_cache:
        return response_cache[question]

    if "joke" in question.lower() or "funny" in question.lower():
        return fetch_joke()

    emotion_label, emotion_score = analyze_emotion(question)
    entities = extract_entities(question)
    response = asyncio.run(generate_response(question))

    if emotion_label == 'joy':
        response = f"Glad to hear you're happy! 😊 {response}"
    elif emotion_label == 'sadness':
        response = f"I'm sorry you're feeling sad. 💔 {response}"
    elif emotion_label == 'anger':
        response = f"It seems like you're upset. Let's work through this together. 💪 {response}"
    elif emotion_label == 'fear':
        response = f"It's okay to be afraid. We can face this together. 🙏 {response}"

    response_cache[question] = response
    return response

# Update QA dataset with new questions and answers
update_lock = threading.Lock()

def update_qa_dataset(question, answer):
    global qa_data, qa_dict
    with update_lock:
        new_entry = pd.DataFrame({'question': [question], 'answer': [answer]})
        qa_data = pd.concat([qa_data, new_entry], ignore_index=True)
        qa_data.to_csv('qa_dataset.csv', index=False)
        qa_dict[question] = answer
        response_cache[question] = answer

# Gradio chat interface
def gradio_chat(question):
    return chat_function_with_emotions(question)

# Gradio UI setup
interface = gr.Interface(
    fn=gradio_chat,
    inputs="text",
    outputs="text",
    title="The Dove Chat Agent",
    description=(
        "Welcome to The Dove Chat Agent! 🌟\n\n"
        "Our chat agent is designed to provide thoughtful, emotion-aware responses to your questions. "
        "Powered by state-of-the-art language models and emotion analysis, it understands your feelings and "
        "responds accordingly to offer you the best assistance.\n\n"
        "Dive into the world of technology, computer science, and IT with our intelligent chat agent. "
        "Equipped with advanced language models and emotion-aware responses, this agent is designed to assist with a wide range of topics. "
        "From coding queries and tech trends to IT troubleshooting and cyber security insights, get precise answers and helpful advice at your fingertips.\n\n"
        "Ask questions related to computer science concepts, technology trends, or IT solutions, and let our chat agent provide you with expert guidance and up-to-date information!"
    ),
    live=True
)

# Launch the Gradio interface
interface.launch(share=True, debug=True)


# Setup Github Directory

In [None]:
# Install Git

sudo apt-get install git



# Configure Git

git config --global user.name "Dovineowuor"

git config --global user.email "owuordovine@gmail.com"



# Navigate to your project directory





# Initialize a Git repository

git init



# Add all files

git add .



# Commit changes

git commit -m "Initial commit"



# Add remote repository

git remote add origin https://github.com/dovineowuor/ai-chatbot.git



# Push changes

git push -u origin main

