<a href="https://colab.research.google.com/github/GirishKGit/CrewAI/blob/main/AI_Conversational_Chatbot_10_09_2024.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [73]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os


# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [74]:
!pip install datasets




In [75]:
!pip install convokit



In [76]:
import os
import pandas as pd
from convokit import Corpus, download

def load_reddit_corpus(data_dir='/content', corpus_name='reddit-corpus-small'):
    """
    Downloads and loads the Reddit corpus. Re-usable function to download and load corpus.
    Args:
        data_dir (str): Directory where data will be saved.
        corpus_name (str): Name of the corpus to download.
    Returns:
        Corpus: The loaded Convokit corpus.
    """
    corpus_path = os.path.join(data_dir, corpus_name)

    # Check if corpus exists, download if not
    if not os.path.exists(corpus_path):
        print(f"Downloading {corpus_name} to {data_dir}...")
        corpus_path = download(corpus_name, data_dir=data_dir)
    else:
        print(f"Corpus already exists in {corpus_path}.")

    # Load the corpus
    corpus = Corpus(filename=corpus_path)
    return corpus

def extract_utterances(corpus):
    """
    Extracts relevant utterance data from a given corpus and returns it as a pandas DataFrame.
    Args:
        corpus (Corpus): A Convokit corpus.
    Returns:
        pd.DataFrame: DataFrame with utterance_id, speaker_id, conversation_id, text, timestamp, reply_to.
    """
    utterances_data = []

    # Extract utterances and relevant information
    for utterance in corpus.iter_utterances():
        utterances_data.append({
            'utterance_id': utterance.id,
            'speaker_id': utterance.speaker.id,
            'conversation_id': utterance.conversation_id,
            'text': utterance.text,
            'timestamp': utterance.timestamp,
            'reply_to': utterance.reply_to if utterance.reply_to else 'No reply'
        })

    # Convert to a DataFrame
    utterances_df = pd.DataFrame(utterances_data)
    return utterances_df

# Example usage:
corpus = load_reddit_corpus()  # Load the Reddit corpus
utterances_df = extract_utterances(corpus)  # Extract utterance data
print(utterances_df.head())  # Display the first few rows of the DataFrame


Corpus already exists in /content/reddit-corpus-small.
  utterance_id     speaker_id conversation_id  \
0       9c716m  AutoModerator          9c716m   
1       9c8amk         rt2236          9c8amk   
2       9ca0yk    Im_Tsuikyit          9ca0yk   
3       9cfz10  AutoModerator          9cfz10   
4       9crqp2  mamypokopants          9crqp2   

                                                text   timestamp  reply_to  
0  Talk about your day. Anything goes, but subred...  1535839576  No reply  
1  I went to visit a few days ago and Ioved it. I...  1535850650  No reply  
2  One time, my family and I had just returned fr...  1535868569  No reply  
3  Talk about your day. Anything goes, but subred...  1535925965  No reply  
4                                                     1536029129  No reply  


In [77]:
# Find rows with empty or missing text
empty_texts = utterances_df[utterances_df['text'].isnull() | (utterances_df['text'] == '')]
print(empty_texts)


       utterance_id     speaker_id conversation_id text   timestamp  reply_to
4            9crqp2  mamypokopants          9crqp2       1536029129  No reply
10           9drpte    RzrRainMnky          9drpte       1536302928  No reply
11           9dsufk   Garlicvideos          9dsufk       1536314439  No reply
13           9e35l8        dudez77          9e35l8       1536399829  No reply
18           9ehwa7   drewiepoodle          9ehwa7       1536538437  No reply
...             ...            ...             ...  ...         ...       ...
294120       9jwmgy       TheKleen          9jwmgy       1538227291  No reply
294121       9k0bpn          xFaro          9k0bpn       1538253478  No reply
294122       9k0usd     farway2000          9k0usd       1538257670  No reply
294123       9k1c5a       mattywoo          9k1c5a       1538261785  No reply
294124       9k4nb6    Dudehitscar          9k4nb6       1538294820  No reply

[4524 rows x 6 columns]


In [78]:
utterances_df = utterances_df[utterances_df['text'].notnull() & (utterances_df['text'] != '')]


In [79]:
utterances_df['text'].fillna("No text provided", inplace=True)
utterances_df['text'].replace('', "No text provided", inplace=True)


In [80]:
# Check if there are any rows with missing or empty text fields after cleaning
empty_texts_after_cleaning = utterances_df[utterances_df['text'].isnull() | (utterances_df['text'] == '')]
print(empty_texts_after_cleaning)


Empty DataFrame
Columns: [utterance_id, speaker_id, conversation_id, text, timestamp, reply_to]
Index: []


In [81]:
# Check for missing values in other important columns
print(utterances_df.isnull().sum())


utterance_id       0
speaker_id         0
conversation_id    0
text               0
timestamp          0
reply_to           0
dtype: int64


In [82]:
utterances_df['reply_to'].fillna("No reply", inplace=True)


In [83]:
utterances_df.head()

Unnamed: 0,utterance_id,speaker_id,conversation_id,text,timestamp,reply_to
0,9c716m,AutoModerator,9c716m,"Talk about your day. Anything goes, but subred...",1535839576,No reply
1,9c8amk,rt2236,9c8amk,I went to visit a few days ago and Ioved it. I...,1535850650,No reply
2,9ca0yk,Im_Tsuikyit,9ca0yk,"One time, my family and I had just returned fr...",1535868569,No reply
3,9cfz10,AutoModerator,9cfz10,"Talk about your day. Anything goes, but subred...",1535925965,No reply
5,9d07z8,AutoModerator,9d07z8,"Talk about your day. Anything goes, but subred...",1536098767,No reply


In [84]:
# Count the number of utterances in each conversation
conversation_summary = utterances_df.groupby('conversation_id').size().reset_index(name='utterance_count')
print(conversation_summary.head())


  conversation_id  utterance_count
0          9bypg3               20
1          9byqyq               11
2          9byrus               15
3          9bytc6               45
4          9byu1v               10


In [85]:
# Find conversation starters
conversation_starters = utterances_df[utterances_df['reply_to'] == "No reply"]
print(conversation_starters.head())


  utterance_id     speaker_id conversation_id  \
0       9c716m  AutoModerator          9c716m   
1       9c8amk         rt2236          9c8amk   
2       9ca0yk    Im_Tsuikyit          9ca0yk   
3       9cfz10  AutoModerator          9cfz10   
5       9d07z8  AutoModerator          9d07z8   

                                                text   timestamp  reply_to  
0  Talk about your day. Anything goes, but subred...  1535839576  No reply  
1  I went to visit a few days ago and Ioved it. I...  1535850650  No reply  
2  One time, my family and I had just returned fr...  1535868569  No reply  
3  Talk about your day. Anything goes, but subred...  1535925965  No reply  
5  Talk about your day. Anything goes, but subred...  1536098767  No reply  


In [86]:
# Count replies to each utterance
reply_count = utterances_df['reply_to'].value_counts().reset_index(name='reply_count')
reply_count.columns = ['utterance_id', 'reply_count']
print(reply_count.head())


  utterance_id  reply_count
0     No reply         3763
1      e5k5np8          472
2      e5ibybt          239
3      e5bp7cl          177
4      e5871fa          165


In [87]:
# Filter conversations where there is at least one "No reply" utterance
no_reply_conversations = utterances_df[utterances_df['reply_to'] == "No reply"]

# Group by conversation ID and count utterances per conversation
conversation_summary_no_reply = no_reply_conversations.groupby('conversation_id').size().reset_index(name='utterance_count')
print(conversation_summary_no_reply.head())

# View an example conversation with "No reply"
example_conversation_id = conversation_summary_no_reply.iloc[0]['conversation_id']
example_conversation = utterances_df[utterances_df['conversation_id'] == example_conversation_id]
print(example_conversation)


  conversation_id  utterance_count
0          9bypg3                1
1          9byrus                1
2          9bz3xn                1
3          9bz6r0                1
4          9bz763                1
      utterance_id            speaker_id conversation_id  \
55590       9bypg3          metalic_acid          9bypg3   
55665      e56qunv            spaceghoti          9bypg3   
55666      e56vt3t             Take_Beer          9bypg3   
55667      e56vwto            spaceghoti          9bypg3   
55668      e56x4f2             Take_Beer          9bypg3   
55669      e56x966            spaceghoti          9bypg3   
55670      e56xizr             Take_Beer          9bypg3   
55671      e56xorc            spaceghoti          9bypg3   
55672      e56xugz  midwesternphotograph          9bypg3   
55673      e56y86b             Take_Beer          9bypg3   
55674      e56yf18            spaceghoti          9bypg3   
55675      e56z3o2             Take_Beer          9bypg3   
55676     

In [88]:
!pip install detoxify




In [89]:
from detoxify import Detoxify

# Function to detect toxicity in user input
def detect_bias_or_toxicity(user_input):
    # Use Detoxify model to predict toxicity
    toxicity = Detoxify('original').predict(user_input)['toxicity']
    return toxicity > 0.7  # Threshold for toxicity detection

# Example chatbot function with bias detection
def chatbot(user_input):
    if detect_bias_or_toxicity(user_input):
        return "I can't comment on that."
    else:
        # Normal response generation (Cohere API or other)
        return generate_response(user_input)


In [90]:
!pip install cohere



In [91]:
import os
import cohere

# Set your Cohere API key as an environment variable
os.environ['COHERE_API_KEY'] = '74CedFbLCqLL4EK6R4onapiBrTUXJoknx59VrsKQ'

# Initialize the Cohere API with the key
co = cohere.Client(os.getenv("COHERE_API_KEY"))


In [92]:
!pip install sentence-transformers faiss-cpu




In [93]:
!pip install sentence-transformers faiss-cpu



In [94]:
# Embed Reddit Conversations Using Hugging Face Sentence Transformers
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss

# Load the Hugging Face sentence transformer model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Sample data (Reddit conversation 'text' field from your dataset)
texts = [
    "Talk about your day. Anything goes, but subreddit rules still apply.",
    "I went to visit a few days ago and loved it.",
    "One time, my family and I had just returned from the beach."
]

# Generate embeddings
embeddings = model.encode(texts)

# Create a FAISS index to store embeddings
embedding_dim = embeddings.shape[1]
index = faiss.IndexFlatL2(embedding_dim)

# Add embeddings to the index
index.add(embeddings)

# Save embeddings and FAISS index for retrieval later
np.save('reddit_embeddings.npy', embeddings)

print("Embeddings generated and FAISS index created.")




Embeddings generated and FAISS index created.


In [95]:
embeddings

array([[-0.04613591, -0.0034425 ,  0.03919309, ...,  0.02243084,
        -0.00262925, -0.00871499],
       [ 0.04835767, -0.00784416,  0.06136188, ...,  0.01608643,
        -0.05029471,  0.01860338],
       [ 0.01496075,  0.03046594,  0.09300791, ...,  0.00111784,
        -0.11325203,  0.02372815]], dtype=float32)

In [96]:
from transformers import AutoTokenizer, AutoModel
import torch
import faiss
import numpy as np
import cohere

# Load Hugging Face model and tokenizer
model_name = "sentence-transformers/all-MiniLM-L6-v2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Example conversation prompts (ensure these are properly defined)
prompts = [
    "You can't handle the truth!",
    "May the Force be with you.",
    "I'll be back.",
    "Here's looking at you, kid.",
    "Houston, we have a problem."
]

# Step 1: Generate embeddings for the prompts (this is necessary to create the FAISS index)
def embed_texts(texts):
    embeddings = []
    for text in texts:
        inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True)
        inputs = {k: v.to(device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = model(**inputs)
            cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()
            embeddings.append(cls_embedding)

    return embeddings

# Embed the prompts
embeddings = embed_texts(prompts)

# Step 2: Create FAISS index and add embeddings
embedding_dim = embeddings[0].shape[1]
index = faiss.IndexFlatL2(embedding_dim)

# Flatten embeddings for FAISS
embeddings = np.vstack(embeddings)
index.add(embeddings)

# Initialize Cohere client (ensure API key is set)
cohere_client = cohere.Client(os.getenv('COHERE_API_KEY'))

# Step 3: Function to embed user query
def embed_user_query(query, tokenizer, model, device):
    inputs = tokenizer(query, return_tensors='pt', padding=True, truncation=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)
        query_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()

    return query_embedding

# Step 4: Function to retrieve the closest conversation using FAISS
def retrieve_conversation(query, tokenizer, model, index, prompts, device):
    query_embedding = embed_user_query(query, tokenizer, model, device)

    # Perform FAISS search to get the closest match
    D, I = index.search(query_embedding, k=1)
    closest_conversation = prompts[I[0][0]]

    return closest_conversation

# Step 5: Function to generate a response using Cohere API
def generate_response(closest_conversation, user_query, cohere_client):
    prompt = f"Previous conversation: {closest_conversation}\nUser: {user_query}\nBot:"

    response = cohere_client.generate(
        model='command-r-plus-04-2024',
        prompt=prompt,
        max_tokens=100
    )

    return response.generations[0].text.strip()

# Example Usage:
user_input = "Tell me something interesting about space."
closest_conversation = retrieve_conversation(user_input, tokenizer, model, index, prompts, device)
bot_response = generate_response(closest_conversation, user_input, cohere_client)

print(f"User: {user_input}")
print(f"Bot: {bot_response}")


User: Tell me something interesting about space.
Bot: Did you know that the Milky Way and Andromeda galaxies are on a collision course? In about 4-5 billion years, the two galaxies will merge to form a giant elliptical galaxy. This event will likely have a profound impact on the structure of the universe and the evolution of life within it.


In [97]:
import pickle
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModel
import torch

# Load or define your model, tokenizer, and embedding
model_name = "sentence-transformers/all-MiniLM-L6-v2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Your text prompts and embeddings (make sure embeddings are already created)
prompts = ["example prompt 1", "example prompt 2", "example prompt 3"]
embeddings = np.random.rand(len(prompts), 384)  # Replace with your actual embeddings

# FAISS index creation (assuming embedding dimension is 384)
index = faiss.IndexFlatL2(384)
index.add(embeddings)

# Save the tokenizer, model, embeddings, FAISS index, and prompts to a pickle file in Colab
save_path = '/content/chatbot_state_v1.pkl'  # The file will be saved in Colab's local storage
with open(save_path, 'wb') as f:
    pickle.dump({
        'tokenizer': tokenizer,
        'model': model,
        'embeddings': embeddings,
        'faiss_index': index,
        'prompts': prompts
    }, f)

print(f"Chatbot state saved successfully at {save_path}.")

# Download the pickle file to your local system from Colab
from google.colab import files
files.download(save_path)


Chatbot state saved successfully at /content/chatbot_state_v1.pkl.


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [98]:
!pip install gradio



In [99]:
!pip install requests




In [100]:
import requests

# Your API key from NewsAPI
api_key = "6aaa6fe75c11492384c603cddab771ca"

# Define the endpoint and parameters for AI-related news
url = "https://newsapi.org/v2/everything"
parameters = {
    'q': 'artificial intelligence OR NLP OR computer vision',  # Query to search for AI, NLP, or Computer Vision news
    'sortBy': 'relevancy',  # Sort news by relevancy
    'apiKey': api_key  # Your API key
}

# Make the request
response = requests.get(url, params=parameters)

# Check if the request was successful
if response.status_code == 200:
    data = response.json()
    articles = data['articles']

    # Display a summary of the articles
    for article in articles[:5]:  # Get the first 5 articles
        print(f"Title: {article['title']}")
        print(f"Source: {article['source']['name']}")
        print(f"Description: {article['description']}\n")
else:
    print("Error:", response.status_code)


Title: How AI Is Deciphering Lost Scrolls From the Roman Empire
Source: Gizmodo.com
Description: Scrolls found in the shadow of Vesuvius and libraries of ancient texts besides are being illuminated by machine learning and computer vision.

Title: Marvel’s Vision Series Is Bringing Back James Spader as Ultron
Source: Gizmodo.com
Description: The as-yet-untitled Vision series—a follow-up to WandaVision—stars Paul Bettany and is coming soon to Disney+.

Title: Marvel’s Vision Series Is Bringing Back James Spader as Ultron
Source: Gizmodo.com
Description: The as-yet-untitled Vision series—a follow-up to WandaVision—stars Paul Bettany and is coming to Disney+.

Title: The iPhone is getting a ‘glow’ up. What to expect from Apple’s Monday event
Source: CNN
Description: Apple excited fans with its vision for its “Apple Intelligence” AI artificial intelligence system“Apple Intelligence” artificial intelligence system earlier ...

Title: Crop loss is a huge problem for sustainability and food se

In [101]:
os.environ['NEWS_API_KEY'] = '6aaa6fe75c11492384c603cddab771ca'


In [102]:
import requests
import gradio as gr
import cohere
import pickle
import os

# Fetch the API keys from environment variables
cohere_api_key = os.getenv('COHERE_API_KEY')
news_api_key = os.getenv('NEWS_API_KEY')

# Initialize Cohere client
cohere_client = cohere.Client(cohere_api_key)

# Initialize a list to store the conversation history (up to 20 conversations)
conversation_history = []

# Function to fetch AI news using NewsAPI
def fetch_ai_news():
    url = "https://newsapi.org/v2/everything"
    parameters = {
        'q': 'artificial intelligence OR NLP OR computer vision',
        'sortBy': 'relevancy',
        'apiKey': news_api_key  # Securely use the API key from environment variables
    }

    response = requests.get(url, params=parameters)
    if response.status_code == 200:
        data = response.json()
        articles = data['articles']

        # Get the top 5 articles and format their titles
        news_summary = "\n".join([f"{i+1}. {article['title']} - {article['source']['name']}" for i, article in enumerate(articles[:5])])
        return f"Here are the top 5 AI news articles:\n{news_summary}"
    else:
        return "Sorry, I couldn't fetch the latest AI news at the moment."

# Function to generate a response using Cohere
def generate_response(conversation_history, user_input, cohere_client):
    # Limit the history to the last 20 exchanges (10 user-bot pairs)
    if len(conversation_history) > 40:  # Each user-bot pair counts as 2 entries
        conversation_history = conversation_history[-40:]

    # Build the conversation history prompt
    conversation_prompt = "\n".join(conversation_history)

    prompt = f"Previous conversation:\n{conversation_prompt}\n\nUser's question: {user_input}\n\nYour response:"

    response = cohere_client.generate(
        model='command-r-plus-04-2024',
        prompt=prompt,
        max_tokens=300,  # Increase the max_tokens to give a more complete response
        temperature=0.7
    )

    return response.generations[0].text.strip()

# Function to handle the chatbot conversation and include news
def chatbot(user_input):
    global conversation_history

    # Check if the user is asking for news
    if 'news' in user_input.lower() or 'trends' in user_input.lower():
        return fetch_ai_news()  # Fetch and return AI news

    # Retrieve the closest conversation based on the user's input
    closest_conversation = retrieve_closest_dialog(user_input, tokenizer, model, index, prompts, device)

    # Generate a response using Cohere
    bot_response = generate_response(conversation_history, user_input, cohere_client)

    # Append the current user input and bot response to the conversation history
    conversation_history.append(f"User: {user_input}")
    conversation_history.append(f"Bot: {bot_response}")

    return bot_response  # Return only the bot response (no need for a second output)

# Gradio Interface
def gradio_interface(user_input):
    return chatbot(user_input)

# Set up Gradio interface
interface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Textbox(lines=2, placeholder="Ask me anything..."),
    outputs="text",  # Single output for the bot's response
    title="AI Conversational Chatbot"
)

# Launch the Gradio app
interface.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://e28f213fbeeffbd3b8.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [103]:
import pickle
import faiss
import numpy as np

# Load the FAISS index and necessary components from the saved .pkl file
with open('chatbot_state_v1.pkl', 'rb') as f:
    data = pickle.load(f)

tokenizer = data['tokenizer']
model = data['model']
embeddings = data['embeddings']
faiss_index = data['faiss_index']
prompts = data['prompts']

# Verify the FAISS index is loaded
print(f"FAISS index size: {faiss_index.ntotal}")  # Should return the number of entries


  return torch.load(io.BytesIO(b))


FAISS index size: 3


In [104]:
import pickle
import faiss
import numpy as np
import torch

# Load the FAISS index and other components from the saved .pkl file
with open('chatbot_state_v1.pkl', 'rb') as f:
    data = pickle.load(f)

tokenizer = data['tokenizer']
model = data['model']
embeddings = data['embeddings']
faiss_index = data['faiss_index']
prompts = data['prompts']

# Verify FAISS index size
print(f"FAISS index size: {faiss_index.ntotal}")  # Should return the number of entries


FAISS index size: 3


In [105]:
import requests
import gradio as gr
import cohere
import pickle
import os

# Fetch the API keys from environment variables
cohere_api_key = os.getenv('COHERE_API_KEY')
news_api_key = os.getenv('NEWS_API_KEY')

# Initialize Cohere client
cohere_client = cohere.Client(cohere_api_key)

# Initialize a list to store the conversation history (up to 20 conversations)
conversation_history = []

# Function to fetch AI news using NewsAPI
def fetch_ai_news(user_query):
    url = "https://newsapi.org/v2/everything"
    parameters = {
        'q': 'artificial intelligence OR NLP OR computer vision ' + user_query,
        'sortBy': 'relevancy',
        'apiKey': news_api_key
    }

    response = requests.get(url, params=parameters)
    if response.status_code == 200:
        data = response.json()
        articles = data['articles']

        # Get the top 5 articles and format their titles
        news_summary = "\n".join([f"{i+1}. {article['title']} - {article['source']['name']}" for i, article in enumerate(articles[:5])])
        return news_summary, articles[0]['description']  # Returning the top article's description for Cohere
    else:
        return "Sorry, I couldn't fetch the latest AI news at the moment.", ""

# Function to generate a response using Cohere
def generate_response(conversation_history, user_input, relevant_news, cohere_client):
    # Limit the history to the last 20 exchanges (10 user-bot pairs)
    if len(conversation_history) > 40:  # Each user-bot pair counts as 2 entries
        conversation_history = conversation_history[-40:]

    # Build the conversation history prompt
    conversation_prompt = "\n".join(conversation_history)

    # Include the relevant news in the prompt
    prompt = f"Previous conversation:\n{conversation_prompt}\n\nRelevant News: {relevant_news}\n\nUser's question: {user_input}\n\nYour response:"

    response = cohere_client.generate(
        model='command-r-plus-04-2024',
        prompt=prompt,
        max_tokens=300,  # Increase the max_tokens to give a more complete response
        temperature=0.7
    )

    return response.generations[0].text.strip()

# Function to handle the chatbot conversation and include news
def chatbot(user_input):
    global conversation_history

    # Fetch news based on the user query
    news_summary, relevant_news = fetch_ai_news(user_input)

    if relevant_news:
        # Generate a response using Cohere by including the news information
        bot_response = generate_response(conversation_history, user_input, relevant_news, cohere_client)
    else:
        bot_response = "I'm sorry, I couldn't fetch the relevant news."

    # Append the current user input and bot response to the conversation history
    conversation_history.append(f"User: {user_input}")
    conversation_history.append(f"Bot: {bot_response}")

    return bot_response

# Gradio Interface
def gradio_interface(user_input):
    return chatbot(user_input)

# Set up Gradio interface
interface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Textbox(lines=2, placeholder="Ask me anything..."),
    outputs="text",  # Single output for the bot's response
    title="AI Conversational Chatbot with NewsAPI"
)

# Launch the Gradio app
interface.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://b46abbf3ae4a947809.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [106]:
import requests
import gradio as gr
import cohere
import os
import csv
import datetime

# Log the message to a CSV file
def log_to_csv(log_message):
    with open('log.csv', mode='a', newline='') as file:
        writer = csv.writer(file)
        timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        writer.writerow([timestamp, log_message])

# Fetch the API keys from environment variables
cohere_api_key = os.getenv('COHERE_API_KEY')
news_api_key = os.getenv('NEWS_API_KEY')

# Log the keys fetched
log_to_csv(f"Cohere API Key: {cohere_api_key}, NewsAPI Key: {news_api_key}")

# Initialize Cohere client
cohere_client = cohere.Client(cohere_api_key)

# Initialize a list to store the conversation history (up to 20 conversations)
conversation_history = []

# Function to fetch AI news using NewsAPI
def fetch_ai_news():
    url = "https://newsapi.org/v2/everything"
    parameters = {
        'q': 'artificial intelligence OR NLP OR computer vision',  # You can adjust the query here
        'sortBy': 'relevancy',
        'apiKey': news_api_key  # Securely use the API key from environment variables
    }

    log_to_csv(f"Fetching news from: {url} with parameters: {parameters}")

    response = requests.get(url, params=parameters)
    log_to_csv(f"NewsAPI response code: {response.status_code}")

    if response.status_code == 200:
        data = response.json()
        articles = data['articles']

        log_to_csv(f"Fetched {len(articles)} articles from NewsAPI.")

        # Get the top 5 articles and format their titles
        news_summary = "\n".join([f"{i+1}. {article['title']} - {article['source']['name']}" for i, article in enumerate(articles[:5])])
        log_to_csv(f"Formatted news summary: {news_summary}")
        return f"Here are the top 5 AI news articles:\n{news_summary}"
    else:
        log_to_csv(f"Error fetching AI news: {response.status_code}")
        return "Sorry, I couldn't fetch the latest AI news at the moment."

# Function to generate a response using Cohere
def generate_response(conversation_history, user_input, cohere_client):
    log_to_csv("Generating a response using Cohere...")

    # Limit the history to the last 20 exchanges (10 user-bot pairs)
    if len(conversation_history) > 40:  # Each user-bot pair counts as 2 entries
        log_to_csv("Trimming conversation history to the last 40 entries.")
        conversation_history = conversation_history[-40:]

    # Build the conversation history prompt
    conversation_prompt = "\n".join(conversation_history)
    log_to_csv(f"Conversation history prompt: {conversation_prompt}")

    prompt = f"Previous conversation:\n{conversation_prompt}\n\nUser's question: {user_input}\n\nYour response:"
    log_to_csv(f"Complete prompt for Cohere: {prompt}")

    response = cohere_client.generate(
        model='command-r-plus-04-2024',
        prompt=prompt,
        max_tokens=300,  # Increase the max_tokens to give a more complete response
        temperature=0.7
    )

    log_to_csv(f"Cohere response: {response.generations[0].text.strip()}")
    return response.generations[0].text.strip()

# Function to classify queries for news or general response
def is_news_query(user_input):
    log_to_csv(f"Checking if the query is related to news: {user_input}")

    # Check for keywords related to news or trends
    news_keywords = ['news', 'trends', 'recent developments', 'latest', 'updates']
    is_news = any(keyword in user_input.lower() for keyword in news_keywords)

    log_to_csv(f"Is news query: {is_news}")
    return is_news

# Main chatbot function
def chatbot(user_input):
    global conversation_history
    log_to_csv(f"User input: {user_input}")

    # Check if the user is asking for news or trends
    if is_news_query(user_input):
        log_to_csv("Fetching news based on user input.")
        return fetch_ai_news()  # Fetch and return AI news

    # If not a news query, continue with normal conversation using Cohere
    bot_response = generate_response(conversation_history, user_input, cohere_client)

    # Append the current user input and bot response to the conversation history
    conversation_history.append(f"User: {user_input}")
    conversation_history.append(f"Bot: {bot_response}")

    log_to_csv(f"Updated conversation history: {conversation_history}")
    return bot_response  # Return only the bot response (no need for a second output)

# Gradio Interface
def gradio_interface(user_input):
    return chatbot(user_input)

# Set up Gradio interface
interface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Textbox(lines=2, placeholder="Ask me anything..."),
    outputs="text",  # Single output for the bot's response
    title="AI Conversational Chatbot with NewsAPI"
)

# Launch the Gradio app
interface.launch()



Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://742b625a8919349db8.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [108]:
print(articles)  # Debug print for articles




In [109]:
print("Calling Cohere API...")
print("Response from Cohere:", bot_response)
print("Conversation history:", conversation_history)


Calling Cohere API...
Response from Cohere: Did you know that the Milky Way and Andromeda galaxies are on a collision course? In about 4-5 billion years, the two galaxies will merge to form a giant elliptical galaxy. This event will likely have a profound impact on the structure of the universe and the evolution of life within it.
Conversation history: []


In [110]:
def chatbot(user_input):
    global conversation_history
    try:
        # Check if the user is asking for news
        if 'news' in user_input.lower() or 'trends' in user_input.lower():
            news = fetch_ai_news()
            print("News Fetched:", news)  # Debug Print
            return news

        # Retrieve the closest conversation based on the user's input
        print("Generating response for input:", user_input)  # Debug Print
        bot_response = generate_response(conversation_history, user_input, cohere_client)

        # Append the current user input and bot response to the conversation history
        conversation_history.append(f"User: {user_input}")
        conversation_history.append(f"Bot: {bot_response}")
        print("Bot Response:", bot_response)  # Debug Print

        return bot_response

    except Exception as e:
        print("Error occurred:", str(e))
        return f"An error occurred: {str(e)}"


In [111]:
import requests
import os

# Fetch the API key from environment variable or use a hardcoded one (for testing)
news_api_key = os.getenv('NEWS_API_KEY', 'your_api_key_here')

# Function to fetch AI news using NewsAPI
def fetch_ai_news():
    url = "https://newsapi.org/v2/everything"
    parameters = {
        'q': 'artificial intelligence OR NLP OR computer vision',
        'sortBy': 'relevancy',
        'apiKey': news_api_key
    }

    response = requests.get(url, params=parameters)

    # Print out the response for debugging purposes
    print(f"NewsAPI Response: {response.text}")  # Check what the response contains

    if response.status_code == 200:
        data = response.json()
        articles = data['articles']

        # Get the top 5 articles and format their titles
        news_summary = "\n".join([f"{i+1}. {article['title']} - {article['source']['name']}" for i, article in enumerate(articles[:5])])
        return f"Here are the top 5 AI news articles:\n{news_summary}"
    else:
        return "Sorry, I couldn't fetch the latest AI news at the moment."

# Example test call
fetch_ai_news()




"Here are the top 5 AI news articles:\n1. How AI Is Deciphering Lost Scrolls From the Roman Empire - Gizmodo.com\n2. Marvel’s Vision Series Is Bringing Back James Spader as Ultron - Gizmodo.com\n3. Marvel’s Vision Series Is Bringing Back James Spader as Ultron - Gizmodo.com\n4. The iPhone is getting a ‘glow’ up. What to expect from Apple’s Monday event - CNN\n5. Crop loss is a huge problem for sustainability and food security. I'm using AI and computer vision to help fix it. - Business Insider"

In [112]:
import requests
import gradio as gr
import cohere
import os

# Fetch the API keys from environment variables
cohere_api_key = os.getenv('COHERE_API_KEY')
news_api_key = os.getenv('NEWS_API_KEY')

# Initialize Cohere client
print("Initializing Cohere client...")
cohere_client = cohere.Client(cohere_api_key)

# Initialize a list to store the conversation history (up to 20 conversations)
conversation_history = []

# Function to fetch AI news using NewsAPI
def fetch_ai_news():
    print("Fetching AI news from NewsAPI...")
    url = "https://newsapi.org/v2/everything"
    parameters = {
        'q': 'artificial intelligence OR NLP OR computer vision',  # You can adjust the query here
        'sortBy': 'relevancy',
        'apiKey': news_api_key  # Securely use the API key from environment variables
    }

    response = requests.get(url, params=parameters)
    print(f"NewsAPI Response Status Code: {response.status_code}")
    if response.status_code == 200:
        data = response.json()
        articles = data['articles']

        # Get the top 5 articles and format their titles
        print("Formatting top 5 articles...")
        news_summary = "\n".join([f"{i+1}. {article['title']} - {article['source']['name']}" for i, article in enumerate(articles[:5])])
        print(f"News Summary: {news_summary}")
        return f"Here are the top 5 AI news articles:\n{news_summary}"
    else:
        print(f"Error fetching AI news: {response.status_code}")
        return "Sorry, I couldn't fetch the latest AI news at the moment."

# Function to generate a response using Cohere
def generate_response(conversation_history, user_input, cohere_client):
    # Limit the history to the last 20 exchanges (10 user-bot pairs)
    print("Generating response using Cohere...")
    if len(conversation_history) > 40:  # Each user-bot pair counts as 2 entries
        conversation_history = conversation_history[-40:]

    # Build the conversation history prompt
    conversation_prompt = "\n".join(conversation_history)
    print(f"Conversation History:\n{conversation_prompt}")

    prompt = f"Previous conversation:\n{conversation_prompt}\n\nUser's question: {user_input}\n\nYour response:"
    print(f"Prompt for Cohere:\n{prompt}")

    response = cohere_client.generate(
        model='command-r-plus-04-2024',
        prompt=prompt,
        max_tokens=300,  # Increase the max_tokens to give a more complete response
        temperature=0.7
    )

    bot_response = response.generations[0].text.strip()
    print(f"Generated Response: {bot_response}")
    return bot_response

# Function to classify queries for news or general response
def is_news_query(user_input):
    # Check for keywords related to news or trends
    print(f"Checking if the user query is about news: {user_input}")
    news_keywords = ['news', 'trends', 'recent developments', 'latest', 'updates']
    is_news = any(keyword in user_input.lower() for keyword in news_keywords)
    print(f"Is News Query: {is_news}")
    return is_news

# Main chatbot function
def chatbot(user_input):
    global conversation_history

    print(f"User Input: {user_input}")

    # Check if the user is asking for news or trends
    if is_news_query(user_input):
        print("Fetching news articles...")
        return fetch_ai_news()  # Fetch and return AI news

    # If not a news query, continue with normal conversation using Cohere
    print("Generating bot response through Cohere...")
    bot_response = generate_response(conversation_history, user_input, cohere_client)

    # Append the current user input and bot response to the conversation history
    conversation_history.append(f"User: {user_input}")
    conversation_history.append(f"Bot: {bot_response}")

    print(f"Updated Conversation History: {conversation_history}")
    return bot_response  # Return only the bot response

# For testing the logic without launching Gradio
def test_chatbot(input_text):
    response = chatbot(input_text)
    print(f"Final Response: {response}")
    return response

# Call test_chatbot with different queries to debug
test_chatbot("What are the new AI developments, in July, 2024?")
test_chatbot("Tell me the latest news in computer vision, in July, 2024?")



Initializing Cohere client...
User Input: What are the new AI developments, in July, 2024?
Checking if the user query is about news: What are the new AI developments, in July, 2024?
Is News Query: False
Generating bot response through Cohere...
Generating response using Cohere...
Conversation History:

Prompt for Cohere:
Previous conversation:


User's question: What are the new AI developments, in July, 2024?

Your response:
Generated Response: Here are some of the new AI developments that have been announced or reported in July 2024:

1. OpenAI's ChatGPT 4.0: OpenAI, the company behind the popular language model ChatGPT, announced that it is working on the next generation of its AI technology, ChatGPT 4.0. The new version is expected to have improved capabilities, including better context understanding, more accurate responses, and enhanced multilingual support.

2. Cohere's Command Model: Cohere, a Canadian AI company, released its Command Model, a large language model trained to fo

"Here are the top 5 AI news articles:\n1. How AI Is Deciphering Lost Scrolls From the Roman Empire - Gizmodo.com\n2. Marvel’s Vision Series Is Bringing Back James Spader as Ultron - Gizmodo.com\n3. Marvel’s Vision Series Is Bringing Back James Spader as Ultron - Gizmodo.com\n4. The iPhone is getting a ‘glow’ up. What to expect from Apple’s Monday event - CNN\n5. Crop loss is a huge problem for sustainability and food security. I'm using AI and computer vision to help fix it. - Business Insider"

In [113]:
import requests
import gradio as gr
import cohere
import os
import re  # For regex to identify dates

# Fetch the API keys from environment variables
cohere_api_key = os.getenv('COHERE_API_KEY')
news_api_key = os.getenv('NEWS_API_KEY')

# Initialize Cohere client
print("Initializing Cohere client...")
cohere_client = cohere.Client(cohere_api_key)

# Initialize a list to store the conversation history (up to 20 conversations)
conversation_history = []

# Function to fetch AI news using NewsAPI
def fetch_ai_news():
    print("Fetching AI news from NewsAPI...")
    url = "https://newsapi.org/v2/everything"
    parameters = {
        'q': 'artificial intelligence OR NLP OR computer vision',  # You can adjust the query here
        'sortBy': 'relevancy',
        'apiKey': news_api_key  # Securely use the API key from environment variables
    }

    response = requests.get(url, params=parameters)
    print(f"NewsAPI Response Status Code: {response.status_code}")

    if response.status_code == 200:
        data = response.json()
        articles = data['articles']

        # Get the top 5 articles and format their titles
        news_summary = "\n".join([f"{i+1}. {article['title']} - {article['source']['name']}" for i, article in enumerate(articles[:5])])
        return f"Here are the top 5 AI news articles:\n{news_summary}"
    else:
        return "Sorry, I couldn't fetch the latest AI news at the moment."

# Function to generate a response using Cohere
def generate_response(conversation_history, user_input, cohere_client):
    print("Generating response using Cohere...")
    # Limit the history to the last 20 exchanges (10 user-bot pairs)
    if len(conversation_history) > 40:  # Each user-bot pair counts as 2 entries
        conversation_history = conversation_history[-40:]

    # Build the conversation history prompt
    conversation_prompt = "\n".join(conversation_history)

    prompt = f"Previous conversation:\n{conversation_prompt}\n\nUser's question: {user_input}\n\nYour response:"
    print(f"Prompt for Cohere:\n{prompt}")

    response = cohere_client.generate(
        model='command-r-plus-04-2024',
        prompt=prompt,
        max_tokens=300,  # Increase the max_tokens to give a more complete response
        temperature=0.7
    )

    return response.generations[0].text.strip()

# Extended function to classify queries for news or general response
def is_news_query(user_input):
    print(f"Checking if the user query is about news: {user_input}")

    # Define keywords related to news or trends
    news_keywords = ['news', 'trends', 'recent developments', 'latest', 'updates']

    # Check if any of the news keywords are in the user query
    if any(keyword in user_input.lower() for keyword in news_keywords):
        return True

    # Check if there is a date in the query using regex (e.g., "July, 2024", "2024", etc.)
    date_pattern = re.compile(r'\b(?:january|february|march|april|may|june|july|august|september|october|november|december)\b.*\b\d{4}\b', re.IGNORECASE)
    if date_pattern.search(user_input):
        return True

    return False

# Main chatbot function
def chatbot(user_input):
    global conversation_history

    # Check if the user is asking for news or trends
    if is_news_query(user_input):
        print("Is News Query: True")
        return fetch_ai_news()  # Fetch and return AI news

    # If not a news query, continue with normal conversation using Cohere
    print("Is News Query: False")
    bot_response = generate_response(conversation_history, user_input, cohere_client)

    # Append the current user input and bot response to the conversation history
    conversation_history.append(f"User: {user_input}")
    conversation_history.append(f"Bot: {bot_response}")

    return bot_response  # Return only the bot response (no need for a second output)

# Function to test chatbot without launching the interface
def test_chatbot(user_input):
    print(f"User Input: {user_input}")
    response = chatbot(user_input)
    print(f"Final Response: {response}")

# Call test_chatbot with different queries to debug
test_chatbot("What are the new AI developments, in July, 2024?")
test_chatbot("Tell me the latest news in computer vision, in July, 2024?")
test_chatbot("What is AI?")
test_chatbot("Explain the latest trends in NLP.")


Initializing Cohere client...
User Input: What are the new AI developments, in July, 2024?
Checking if the user query is about news: What are the new AI developments, in July, 2024?
Is News Query: True
Fetching AI news from NewsAPI...
NewsAPI Response Status Code: 200
Final Response: Here are the top 5 AI news articles:
1. How AI Is Deciphering Lost Scrolls From the Roman Empire - Gizmodo.com
2. Marvel’s Vision Series Is Bringing Back James Spader as Ultron - Gizmodo.com
3. Marvel’s Vision Series Is Bringing Back James Spader as Ultron - Gizmodo.com
4. The iPhone is getting a ‘glow’ up. What to expect from Apple’s Monday event - CNN
5. Crop loss is a huge problem for sustainability and food security. I'm using AI and computer vision to help fix it. - Business Insider
User Input: Tell me the latest news in computer vision, in July, 2024?
Checking if the user query is about news: Tell me the latest news in computer vision, in July, 2024?
Is News Query: True
Fetching AI news from NewsAPI.

In [114]:
import requests
import os
import cohere

# Fetch the API keys from environment variables
cohere_api_key = os.getenv('COHERE_API_KEY')
news_api_key = os.getenv('NEWS_API_KEY')

# Initialize Cohere client
cohere_client = cohere.Client(cohere_api_key)

# Initialize a list to store the conversation history (up to 20 conversations)
conversation_history = []

# Function to fetch AI news using NewsAPI with fallback if results are irrelevant
def fetch_ai_news():
    url = "https://newsapi.org/v2/everything"
    parameters = {
        'q': 'artificial intelligence AND trends OR developments AND 2024',  # More specific query
        'sortBy': 'relevancy',
        'apiKey': news_api_key
    }

    response = requests.get(url, params=parameters)
    if response.status_code == 200:
        data = response.json()
        articles = data['articles']

        # Check if the articles contain irrelevant keywords
        irrelevant_keywords = ['Marvel', 'iPhone', 'celebrity', 'TV', 'movie']
        if any(irrelevant_keyword in article['title'] for article in articles for irrelevant_keyword in irrelevant_keywords):
            return "The news articles fetched don't seem relevant. Let me provide an explanation instead."

        # Get the top 5 articles and format their titles
        news_summary = "\n".join([f"{i+1}. {article['title']} - {article['source']['name']}" for i, article in enumerate(articles[:5])])
        return f"Here are the top 5 AI news articles:\n{news_summary}"
    else:
        return "Sorry, I couldn't fetch the latest AI news at the moment."

# Function to generate a response using Cohere
def generate_response(conversation_history, user_input, cohere_client):
    # Limit the history to the last 20 exchanges (10 user-bot pairs)
    if len(conversation_history) > 40:  # Each user-bot pair counts as 2 entries
        conversation_history = conversation_history[-40:]

    # Build the conversation history prompt
    conversation_prompt = "\n".join(conversation_history)

    prompt = f"Previous conversation:\n{conversation_prompt}\n\nUser's question: {user_input}\n\nYour response:"

    response = cohere_client.generate(
        model='command-r-plus-04-2024',
        prompt=prompt,
        max_tokens=300,  # Increase the max_tokens to give a more complete response
        temperature=0.7
    )

    return response.generations[0].text.strip()

# Function to classify queries for news or general response
def is_news_query(user_input):
    # Check for keywords related to news or trends in specific time periods
    news_keywords = ['news', 'trends', 'recent', 'developments', 'latest', 'updates', 'in']
    # Also ensure we're handling specific queries like months or years
    time_indicators = ['2024', '2023', 'July', 'June', 'May', 'in']  # Extend as needed

    # If the query contains both news and time-related indicators, return True
    if any(keyword in user_input.lower() for keyword in news_keywords) and any(time_word in user_input.lower() for time_word in time_indicators):
        return True
    return False

# Main chatbot function
def chatbot(user_input):
    global conversation_history

    print(f"User Input: {user_input}")
    print(f"Checking if the user query is about news: {user_input}")

    # Check if the user is asking for news or trends
    if is_news_query(user_input):
        print("Is News Query: True")
        return fetch_ai_news()  # Fetch and return AI news

    print("Is News Query: False")

    # If not a news query, continue with normal conversation using Cohere
    print("Generating response using Cohere...")
    bot_response = generate_response(conversation_history, user_input, cohere_client)

    # Append the current user input and bot response to the conversation history
    conversation_history.append(f"User: {user_input}")
    conversation_history.append(f"Bot: {bot_response}")

    return bot_response  # Return only the bot response (no need for a second output)

# Test the chatbot with some queries
def test_chatbot(user_input):
    print("Initializing Cohere client...")
    response = chatbot(user_input)
    print(f"Final Response: {response}")

# Call test_chatbot with different queries to debug
test_chatbot("What are the new AI developments, in July, 2024?")
test_chatbot("Tell me the latest news in computer vision, in July, 2024?")
test_chatbot("What is AI?")
test_chatbot("Explain the latest trends in NLP.")


Initializing Cohere client...
User Input: What are the new AI developments, in July, 2024?
Checking if the user query is about news: What are the new AI developments, in July, 2024?
Is News Query: True
Final Response: Here are the top 5 AI news articles:
1. Palantir Technologies Inc. (NYSE:PLTR): Microsoft Deal Is Game Changing - Yahoo Entertainment
2. Superyacht Security: Here Are The Top Ways To Protect Your Voyage - Forbes
3. Morning Bid: Pre-Nvidia hesitation obscures record high - Yahoo Entertainment
4. Why Artificial Intelligence Hype Isn't Living Up To Expectations - Forbes
5. McKinsey’s 2024 Tech Trends And What Gemini And Claude Think About Them - Forbes
Initializing Cohere client...
User Input: Tell me the latest news in computer vision, in July, 2024?
Checking if the user query is about news: Tell me the latest news in computer vision, in July, 2024?
Is News Query: True
Final Response: Here are the top 5 AI news articles:
1. Palantir Technologies Inc. (NYSE:PLTR): Microsoft 

In [115]:
import requests
import os
import cohere

# Fetch the API keys from environment variables
cohere_api_key = os.getenv('COHERE_API_KEY')
news_api_key = os.getenv('NEWS_API_KEY')

# Initialize Cohere client
cohere_client = cohere.Client(cohere_api_key)

# Initialize a list to store the conversation history (up to 20 conversations)
conversation_history = []

# Function to fetch AI news using NewsAPI with fallback if results are irrelevant
def fetch_ai_news():
    url = "https://newsapi.org/v2/everything"
    parameters = {
        'q': 'artificial intelligence AND trends OR developments OR "natural language processing" OR "computer vision"',
        'sortBy': 'relevancy',
        'language': 'en',
        'apiKey': news_api_key
    }

    response = requests.get(url, params=parameters)
    if response.status_code == 200:
        data = response.json()
        articles = data['articles']

        # Filter out irrelevant articles based on keywords in the titles
        irrelevant_keywords = ['yacht', 'entertainment', 'Forbes', 'Nvidia', 'McKinsey', 'superyacht', 'celebrity', 'luxury', 'voyage']
        filtered_articles = [
            article for article in articles if not any(keyword.lower() in article['title'].lower() for keyword in irrelevant_keywords)
        ]

        if not filtered_articles:
            return "The news articles fetched don't seem relevant. Let me provide an explanation instead."

        # Get the top 5 articles and format their titles
        news_summary = "\n".join([f"{i+1}. {article['title']} - {article['source']['name']}" for i, article in enumerate(filtered_articles[:5])])
        return f"Here are the top 5 AI news articles:\n{news_summary}"
    else:
        return "Sorry, I couldn't fetch the latest AI news at the moment."

# Function to generate a response using Cohere
def generate_response(conversation_history, user_input, cohere_client):
    # Limit the history to the last 20 exchanges (10 user-bot pairs)
    if len(conversation_history) > 40:  # Each user-bot pair counts as 2 entries
        conversation_history = conversation_history[-40:]

    # Build the conversation history prompt
    conversation_prompt = "\n".join(conversation_history)

    prompt = f"Previous conversation:\n{conversation_prompt}\n\nUser's question: {user_input}\n\nYour response:"

    response = cohere_client.generate(
        model='command-r-plus-04-2024',
        prompt=prompt,
        max_tokens=300,  # Increase the max_tokens to give a more complete response
        temperature=0.7
    )

    return response.generations[0].text.strip()

# Function to classify queries for news or general response
def is_news_query(user_input):
    # Check for keywords related to news or trends in specific time periods
    news_keywords = ['news', 'trends', 'recent', 'developments', 'latest', 'updates', 'in']
    # Also ensure we're handling specific queries like months or years
    time_indicators = ['2024', '2023', 'July', 'June', 'May', 'in']  # Extend as needed

    # If the query contains both news and time-related indicators, return True
    if any(keyword in user_input.lower() for keyword in news_keywords) and any(time_word in user_input.lower() for time_word in time_indicators):
        return True
    return False

# Main chatbot function
def chatbot(user_input):
    global conversation_history

    print(f"User Input: {user_input}")
    print(f"Checking if the user query is about news: {user_input}")

    # Check if the user is asking for news or trends
    if is_news_query(user_input):
        print("Is News Query: True")
        return fetch_ai_news()  # Fetch and return AI news

    print("Is News Query: False")

    # If not a news query, continue with normal conversation using Cohere
    print("Generating response using Cohere...")
    bot_response = generate_response(conversation_history, user_input, cohere_client)

    # Append the current user input and bot response to the conversation history
    conversation_history.append(f"User: {user_input}")
    conversation_history.append(f"Bot: {bot_response}")

    return bot_response  # Return only the bot response (no need for a second output)

# Test the chatbot with some queries
def test_chatbot(user_input):
    print("Initializing Cohere client...")
    response = chatbot(user_input)
    print(f"Final Response: {response}")

# Call test_chatbot with different queries to debug
test_chatbot("What are the new AI developments, in July, 2024?")
test_chatbot("Tell me the latest news in computer vision, in July, 2024?")
test_chatbot("What is AI?")
test_chatbot("Explain the latest trends in NLP.")


Initializing Cohere client...
User Input: What are the new AI developments, in July, 2024?
Checking if the user query is about news: What are the new AI developments, in July, 2024?
Is News Query: True
Final Response: Here are the top 5 AI news articles:
1. How AI Is Deciphering Lost Scrolls From the Roman Empire - Gizmodo.com
2. Nascar Pit Crews Are Using AI for the Perfect Pit Stop - Wired
3. Ikea Employees Are Getting New AI Drone Coworkers in the U.S. - Gizmodo.com
4. Crop loss is a huge problem for sustainability and food security. I'm using AI and computer vision to help fix it. - Business Insider
5. How to Watch Apple's iPhone 16 Launch Event, and What to Expect - Wired
Initializing Cohere client...
User Input: Tell me the latest news in computer vision, in July, 2024?
Checking if the user query is about news: Tell me the latest news in computer vision, in July, 2024?
Is News Query: True
Final Response: Here are the top 5 AI news articles:
1. How AI Is Deciphering Lost Scrolls F

In [116]:
# Updated filter keywords and improved search queries
def fetch_ai_news():
    url = "https://newsapi.org/v2/everything"
    parameters = {
        'q': 'artificial intelligence AND (NLP OR "computer vision" OR "deep learning" OR "machine learning")',
        'sortBy': 'relevancy',
        'language': 'en',
        'apiKey': news_api_key
    }

    response = requests.get(url, params=parameters)
    if response.status_code == 200:
        data = response.json()
        articles = data['articles']

        # Enhanced irrelevant keyword filtering
        irrelevant_keywords = ['yacht', 'entertainment', 'Forbes', 'Apple', 'iPhone', 'launch', 'event', 'celebrity', 'luxury', 'voyage', 'drone', 'furniture', 'Ikea']
        filtered_articles = [
            article for article in articles if not any(keyword.lower() in article['title'].lower() for keyword in irrelevant_keywords)
        ]

        if not filtered_articles:
            return "The news articles fetched don't seem relevant. Let me provide an explanation instead."

        # Get the top 5 articles and format their titles
        news_summary = "\n".join([f"{i+1}. {article['title']} - {article['source']['name']}" for i, article in enumerate(filtered_articles[:5])])
        return f"Here are the top 5 AI news articles:\n{news_summary}"
    else:
        return "Sorry, I couldn't fetch the latest AI news at the moment."

# Test the chatbot with some queries
test_chatbot("What are the new AI developments, in July, 2024?")
test_chatbot("Tell me the latest news in computer vision, in July, 2024?")
test_chatbot("What is AI?")
test_chatbot("Explain the latest trends in NLP.")


Initializing Cohere client...
User Input: What are the new AI developments, in July, 2024?
Checking if the user query is about news: What are the new AI developments, in July, 2024?
Is News Query: True
Final Response: Here are the top 5 AI news articles:
1. How AI Is Deciphering Lost Scrolls From the Roman Empire - Gizmodo.com
2. Crop loss is a huge problem for sustainability and food security. I'm using AI and computer vision to help fix it. - Business Insider
3. Roboflow (YC S20) is hiring an ML Engineer to democratize computer vision - Hacker News
4. Can Google Make Stoplights Smarter? - Slashdot.org
5. Artificial Intelligence Cheapens the Artistic Imagination - Convergemedia.org
Initializing Cohere client...
User Input: Tell me the latest news in computer vision, in July, 2024?
Checking if the user query is about news: Tell me the latest news in computer vision, in July, 2024?
Is News Query: True
Final Response: Here are the top 5 AI news articles:
1. How AI Is Deciphering Lost Scr

In [117]:
# Function to fetch AI news using NewsAPI
def fetch_ai_news(query):
    # Detect specific topics from the user query
    if "computer vision" in query.lower():
        search_query = 'computer vision'
    elif "NLP" in query.lower() or "natural language processing" in query.lower():
        search_query = 'NLP OR "natural language processing"'
    else:
        search_query = 'artificial intelligence AND (NLP OR "computer vision" OR "deep learning" OR "machine learning")'

    url = "https://newsapi.org/v2/everything"
    parameters = {
        'q': search_query,
        'sortBy': 'relevancy',
        'language': 'en',
        'apiKey': news_api_key
    }

    response = requests.get(url, params=parameters)
    if response.status_code == 200:
        data = response.json()
        articles = data['articles']

        # Enhanced irrelevant keyword filtering
        irrelevant_keywords = ['yacht', 'entertainment', 'Forbes', 'Apple', 'iPhone', 'launch', 'event', 'celebrity', 'luxury', 'voyage', 'drone', 'furniture', 'Ikea']
        filtered_articles = [
            article for article in articles if not any(keyword.lower() in article['title'].lower() for keyword in irrelevant_keywords)
        ]

        if not filtered_articles:
            return "The news articles fetched don't seem relevant. Let me provide an explanation instead."

        # Get the top 5 articles and format their titles
        news_summary = "\n".join([f"{i+1}. {article['title']} - {article['source']['name']}" for i, article in enumerate(filtered_articles[:5])])
        return f"Here are the top 5 {search_query} news articles:\n{news_summary}"
    else:
        return "Sorry, I couldn't fetch the latest AI news at the moment."

# Main chatbot function
def chatbot(user_input):
    global conversation_history

    # Check if the user is asking for news or trends
    if is_news_query(user_input):
        print("Is News Query: True")
        return fetch_ai_news(user_input)  # Fetch and return AI news

    print("Is News Query: False")

    # If not a news query, continue with normal conversation using Cohere
    bot_response = generate_response(conversation_history, user_input, cohere_client)

    # Append the current user input and bot response to the conversation history
    conversation_history.append(f"User: {user_input}")
    conversation_history.append(f"Bot: {bot_response}")

    return bot_response

# Test the chatbot with some queries
test_chatbot("What are the new AI developments, in July, 2024?")
test_chatbot("Tell me the latest news in computer vision, in July, 2024?")
test_chatbot("What is AI?")
test_chatbot("Explain the latest trends in NLP.")


Initializing Cohere client...
Is News Query: True
Final Response: Here are the top 5 artificial intelligence AND (NLP OR "computer vision" OR "deep learning" OR "machine learning") news articles:
1. How AI Is Deciphering Lost Scrolls From the Roman Empire - Gizmodo.com
2. Crop loss is a huge problem for sustainability and food security. I'm using AI and computer vision to help fix it. - Business Insider
3. Roboflow (YC S20) is hiring an ML Engineer to democratize computer vision - Hacker News
4. Can Google Make Stoplights Smarter? - Slashdot.org
5. Artificial Intelligence Cheapens the Artistic Imagination - Convergemedia.org
Initializing Cohere client...
Is News Query: True
Final Response: Here are the top 5 computer vision news articles:
1. How AI Is Deciphering Lost Scrolls From the Roman Empire - Gizmodo.com
2. Crop loss is a huge problem for sustainability and food security. I'm using AI and computer vision to help fix it. - Business Insider
3. The Plaud NotePin Is an AI Notetaker 

In [119]:
!pip install langchain



In [120]:
!pip install --upgrade langchain



In [121]:
!pip install langchain-community



In [122]:
import requests
import cohere
import os
import gradio as gr
from langchain.agents import Tool
from datetime import datetime

# Fetch the API keys from environment variables
cohere_api_key = os.getenv('COHERE_API_KEY')
news_api_key = os.getenv('NEWS_API_KEY')

# Initialize Cohere client
cohere_client = cohere.Client(cohere_api_key)

# Tool 1: Fetch AI news using NewsAPI
def fetch_ai_news(query):
    search_query = 'artificial intelligence AND (NLP OR "computer vision" OR "deep learning" OR "machine learning")'
    url = "https://newsapi.org/v2/everything"
    parameters = {
        'q': search_query,
        'sortBy': 'relevancy',
        'language': 'en',
        'apiKey': news_api_key
    }
    response = requests.get(url, params=parameters)
    if response.status_code == 200:
        data = response.json()
        articles = data['articles']

        # Filter out articles with future dates
        current_date = datetime.now().date()
        valid_articles = [article for article in articles if datetime.strptime(article['publishedAt'][:10], "%Y-%m-%d").date() <= current_date]

        news_summary = "\n".join([f"{i+1}. {article['title']} - {article['source']['name']}" for i, article in enumerate(valid_articles[:5])])
        return f"Here are the top 5 news articles:\n{news_summary}"
    else:
        return "Sorry, I couldn't fetch the latest AI news at the moment."

# Tool 2: Generate a response using Cohere
def generate_response(query):
    prompt = f"User's question: {query}\n\nYour response:"
    response = cohere_client.generate(
        model='command-r-plus-04-2024',
        prompt=prompt,
        max_tokens=500,
        temperature=0.7
    )
    return response.generations[0].text.strip()

# Tool definitions
news_tool = Tool(
    name="Fetch AI News",
    func=fetch_ai_news,
    description="Use this tool to fetch the latest news about AI."
)

llm_tool = Tool(
    name="Generate AI Response",
    func=generate_response,
    description="Use this tool to generate AI-related answers."
)

# Agent Logic: Choose between news or Cohere LLM based on input
def agent_logic(query):
    if "news" in query.lower() or "trends" in query.lower() or any(word in query.lower() for word in ["latest", "updates", "recent"]):
        return news_tool.run(query)
    else:
        return llm_tool.run(query)

# Gradio Interface: Connect the agent logic to a Gradio app
def chatbot(user_input):
    response = agent_logic(user_input)

    # Check if the response looks like code
    if response.count('\n') > 2 and ('def ' in response or 'class ' in response or '```' in response):
        return response, response
    else:
        return response, None

import requests
import cohere
import os
import gradio as gr
from langchain.agents import Tool
from datetime import datetime

# ... (previous code remains unchanged)

custom_css = """
body {
    background-color: #f0f4f8;
    font-family: 'Arial', sans-serif;
}
.container {
    max-width: 800px;
    margin: 0 auto;
    padding: 20px;
    background-color: white;
    border-radius: 10px;
    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
h1 {
    color: #2c3e50;
    text-align: center;
    margin-bottom: 20px;
}
.input-area, .output-area {
    margin-bottom: 20px;
}
.submit-btn, .copy-btn {
    background-color: #3498db;
    color: white;
    border: none;
    padding: 10px 20px;
    border-radius: 5px;
    cursor: pointer;
    transition: background-color 0.3s;
}
.submit-btn:hover, .copy-btn:hover {
    background-color: #2980b9;
}
"""

with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
    with gr.Column(elem_classes="container"):
        gr.Markdown("# AI Conversational Agent")
        gr.Markdown("Ask questions about AI or request the latest AI news.")

        with gr.Column(elem_classes="input-area"):
            input_text = gr.Textbox(lines=3, placeholder="Ask me anything...", label="Your Question")
            submit_button = gr.Button("Submit", elem_classes="submit-btn")

        with gr.Column(elem_classes="output-area"):
            output_text = gr.Textbox(lines=10, label="Response")
            output_code = gr.Code(language="python", label="Code Output", visible=False)
            copy_button = gr.Button("Copy Response", elem_classes="copy-btn")

    def clear_input(input_text):
        return ""

    def copy_response(response, code):
        return gr.Textbox.update(value=code if code else response, visible=True)

    submit_button.click(
        fn=chatbot,
        inputs=input_text,
        outputs=[output_text, output_code]
    ).then(
        fn=clear_input,
        inputs=input_text,
        outputs=input_text
    )

    copy_button.click(
        fn=copy_response,
        inputs=[output_text, output_code],
        outputs=gr.Textbox(visible=False),
    )

# Launch the Gradio app
demo.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://c48e77a841d0562673.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


