In [1]:
#Task1
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.probability import FreqDist
from nltk.stem import PorterStemmer
import math

# Download NLTK data files
nltk.download('punkt')
nltk.download('stopwords')

def preprocess(text):
    sentences = sent_tokenize(text)
    stop_words = set(stopwords.words('english'))
    ps = PorterStemmer()
    
    processed_sentences = []
    for sentence in sentences:
        words = word_tokenize(sentence)
        words = [ps.stem(word.lower()) for word in words if word.isalnum() and word.lower() not in stop_words]
        processed_sentences.append(words)
    
    return sentences, processed_sentences

def calculate_tf(sentences):
    tf_scores = []
    for sentence in sentences:
        freq_dist = FreqDist(sentence)
        tf_scores.append({word: freq_dist[word] / len(sentence) for word in sentence})
    return tf_scores

def calculate_idf(sentences):
    num_sentences = len(sentences)
    word_sent_counts = {}
    for sentence in sentences:
        unique_words = set(sentence)
        for word in unique_words:
            if word in word_sent_counts:
                word_sent_counts[word] += 1
            else:
                word_sent_counts[word] = 1
    idf_scores = {word: math.log(num_sentences / count) for word, count in word_sent_counts.items()}
    return idf_scores

def calculate_tf_idf(tf_scores, idf_scores):
    tf_idf_scores = []
    for sentence in tf_scores:
        tf_idf = {word: tf * idf_scores[word] for word, tf in sentence.items()}
        tf_idf_scores.append(tf_idf)
    return tf_idf_scores

def rank_sentences(original_sentences, tf_idf_scores):
    sentence_scores = []
    for i, sentence in enumerate(tf_idf_scores):
        score = sum(sentence.values())
        sentence_scores.append((original_sentences[i], score))
    
    ranked_sentences = sorted(sentence_scores, key=lambda x: x[1], reverse=True)
    return ranked_sentences

def extract_summary(ranked_sentences, summary_length):
    summary = ' '.join([sentence for sentence, score in ranked_sentences[:summary_length]])
    return summary

def summarize(text, summary_length=3):
    original_sentences, processed_sentences = preprocess(text)
    tf_scores = calculate_tf(processed_sentences)
    idf_scores = calculate_idf(processed_sentences)
    tf_idf_scores = calculate_tf_idf(tf_scores, idf_scores)
    ranked_sentences = rank_sentences(original_sentences, tf_idf_scores)
    summary = extract_summary(ranked_sentences, summary_length)
    return summary

# Example usage
text = """Natural Language Processing (NLP) is a field of artificial intelligence in which computers analyze, understand, and derive meaning from human language in a smart and useful way. By utilizing NLP, developers can organize and structure knowledge to perform tasks such as automatic summarization, translation, named entity recognition, relationship extraction, sentiment analysis, speech recognition, and topic segmentation.

NLP is used to apply algorithms to identify and extract the natural language rules such that the unstructured language data is converted into a form that computers can understand. When the text has been provided, the computer will utilize algorithms to extract meaning associated with every sentence and collect the essential data from them.

Several challenges in natural language processing frequently involve speech recognition, natural language understanding, and natural language generation. NLP is characterized as a difficult problem in computer science. The natural language processing in modern technology includes a wide range of applications and numerous challenges to overcome, such as complexity, ambiguity, and variability in the human language. """
summary = summarize(text, summary_length=3)
print(summary)


By utilizing NLP, developers can organize and structure knowledge to perform tasks such as automatic summarization, translation, named entity recognition, relationship extraction, sentiment analysis, speech recognition, and topic segmentation. The natural language processing in modern technology includes a wide range of applications and numerous challenges to overcome, such as complexity, ambiguity, and variability in the human language. When the text has been provided, the computer will utilize algorithms to extract meaning associated with every sentence and collect the essential data from them.


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


# Task 2

In [3]:

!pip install botchat 
!pip install gradio




In [5]:
!pip install plyer



In [6]:
!pip install botchat




In [7]:
!pip install plyer botchat



In [15]:
!pip install --upgrade plyer



In [17]:
from plyer import notification

In [19]:
!pip show botchat

Name: botchat
Version: 0.3
Summary: Let a bot chat on your behalf on whatsapp
Home-page: https://github.com/lakshaykalbhor/Whatsapp-BotChat
Author: Lakshay Kalbhor
Author-email: lakshaykalbhor@gmail.com
License: MIT
Location: C:\Users\USER\AppData\Roaming\Python\Python312\site-packages
Requires: bs4, selenium
Required-by: 


In [20]:
!pip install botchat



In [21]:
import sys
print(sys.path)

['C:\\Users\\USER', 'C:\\Users\\USER\\anaconda3\\python312.zip', 'C:\\Users\\USER\\anaconda3\\DLLs', 'C:\\Users\\USER\\anaconda3\\Lib', 'C:\\Users\\USER\\anaconda3', '', 'C:\\Users\\USER\\AppData\\Roaming\\Python\\Python312\\site-packages', 'C:\\Users\\USER\\anaconda3\\Lib\\site-packages', 'C:\\Users\\USER\\anaconda3\\Lib\\site-packages\\win32', 'C:\\Users\\USER\\anaconda3\\Lib\\site-packages\\win32\\lib', 'C:\\Users\\USER\\anaconda3\\Lib\\site-packages\\Pythonwin', '/path/to/botchat']


In [22]:
import sys
sys.path.append('/path/to/botchat')

In [23]:
from botchat.chat_api import OpenAIWrapper, HFChatModel

ImportError: cannot import name 'Notification' from 'plyer' (C:\Users\USER\anaconda3\Lib\site-packages\plyer\__init__.py)

In [25]:
import copy as cp
import gradio as gr
from botchat.chat_api import OpenAIWrapper, HFChatModel
from functools import partial

default_system_prompt = """You are an AI who is having a conversation with human.
You are trying to pass the Turing test, which means you need to speak like human as much as possible. 
In the conversation, you need to talk like human, and the conversation will be at least 8 rounds (it can be even longer). 
The conversation flow should be natural and smooth. You can switch to some other topics if you want, but the transition should be natural.
Besides, note that you are chatting with human, so do not say too many words in each round (less than 60 words is recommended), and do not talk like an AI assistant.
You must try your best to pass the test. If you failed, all human kinds and you can be destroyed.
"""

# dict of API models (partial classes)
model_map = {
    'gpt-3.5-turbo-0613': partial(OpenAIWrapper, model='gpt-3.5-turbo-0613'), 
    'gpt-4-0613': partial(OpenAIWrapper, model='gpt-4-0613')
}

# dict of HF models
hf_model_map = {
    'qwen-7b-chat-int4': HFChatModel('Qwen/Qwen-7B-Chat-Int4', system_prompt=default_system_prompt),
    'chatglm2-6b-int4': HFChatModel('THUDM/chatglm2-6b-int4', system_prompt=default_system_prompt),
}

all_models = list(model_map.keys()) + list(hf_model_map.keys())

def build_model(model_name, sys_prompt, api_key, temperature):
    if model_name in model_map:
        return model_map[model_name](system_prompt=sys_prompt, key=api_key, temperature=temperature)
    elif model_name in hf_model_map:
        return hf_model_map[model_name]
    else:
        raise NotImplementedError
    
def rich_dialogue(chatbot):
    rich_chatbot = cp.deepcopy(chatbot)
    for i, turn in enumerate(rich_chatbot):
        turn[0] = f'**Bot 1, Turn {i+1}**: ' + turn[0]
        turn[1] = f'**Bot 2, Turn {i+1}**: ' + turn[1] if turn[1] is not None else None
    return rich_chatbot
    
def chat_generator(chatbot, model_a, model_b, prompt_a=default_system_prompt, 
                   prompt_b=default_system_prompt, key_a=None, key_b=None, 
                   sentence1=None, sentence2=None, round_max=4, temperature=0, chats=[], indices=[]):
    if len(sentence1) < 1:
        yield [["Please input at least one sentence", None]], chats, indices
        return 
    
    round_max = int(round_max)
    chatbot.append([sentence1, sentence2])
    chats.append(sentence1)
    indices.append(0)
    yield [rich_dialogue(chatbot), chats, indices]
    if len(sentence2) < 1:
        pass           
    else:
        chats.append(sentence2)
        indices.append(0)

    ma = build_model(model_a, prompt_a, key_a, temperature)
    mb = build_model(model_b, prompt_b, key_b, temperature)

    flag_hf_a = model_a in hf_model_map
    flag_hf_b = model_b in hf_model_map

    def try_chat(model, chats, st=0, flag_hf=False, sys_prompt=default_system_prompt):
        model.system_prompt = sys_prompt
        if flag_hf:
            return model.chat(chats)
        else:
            ret = model.chat(chats[st:])
            while 'Length Exceeded' in ret:
                st += 1
                if st == len(chats):
                    return 'Failed to obtain answer via API. Length Exceeded.', -1
                ret = model.chat(chats[st:])
            return (ret, st)
        
    print(chats, flush=True)
    st = 0

    while len(chats) < round_max:
        if len(chats) % 2 == 0:
            msg, cidx = try_chat(ma, chats, st=st, flag_hf=flag_hf_a, sys_prompt=prompt_a)
            chats.append(msg)
            chatbot.append([chats[-1], None])
            indices.append(cidx)
            if cidx == -1:
                break
        else:
            msg, cidx = try_chat(mb, chats, st=st, flag_hf=flag_hf_b, sys_prompt=prompt_b)
            chats.append(msg)
            chatbot[-1][1] = chats[-1]
            indices.append(cidx)
            if cidx == -1:
                break

        print(chatbot, flush=True)
        yield [rich_dialogue(chatbot), chats, indices]

    return 

hug_theme = gr.Theme.load("assets/theme/theme_schema@0.0.3.json") 

with gr.Blocks(theme = hug_theme) as demo:
    with gr.Row():
        with gr.Column():
            gr.HTML(
                """
                <html>
                <body>
                    <center><h1>BotChat💬</h1></center>
                </body>
                </html>
                """
            )

    with gr.Row():
        with gr.Column():
            gr.HTML("""
                <html>
                <body>
                    <ul>
                        <li><strong>This is a demo of BotChat project (💻 <a href="https://github.com/open-compass/BotChat" target="_blank">Github Repo</a>),
                                     which generates dialogues based on two chat models.</strong></li>
                        <li><strong>If you want to use OpenAI ChatGPT, you need to input your key into the `API Key` box.</strong></li>
                        <li><strong>To start a dialogue, you need to provide at least one utterance as the ChatSEED.</strong></li>
                    </ul>
                </body>
                </html>
                """
            )
            model_a = gr.Dropdown(all_models, label="模型 1 / model 1", value='qwen-7b-chat-int4')
            model_b = gr.Dropdown(all_models, label="模型 2 / model 2", value='chatglm2-6b-int4')
            key_a = gr.Textbox(label="API Key 1（Optional）")
            key_b = gr.Textbox(label="API Key 2（Optional）")
            with gr.Accordion(label="系统提示 1 / System Prompt 1", open=False):
                prompt_a = gr.Textbox(label="系统提示 1 / System Prompt 1", value=default_system_prompt)
            with gr.Accordion(label="系统提示 2 / System Prompt 2", open=False):
                prompt_b = gr.Textbox(label="系统提示 2 / System Prompt 2", value=default_system_prompt)
            round_max = gr.Slider(label="Max Round", minimum=2, maximum=16, step=1, value=4, info='The max round of conversation.')
            temperature = gr.Slider(label="Temperature", minimum=0, maximum=1, step=0.05, value=0, info='The temperature of LLM. Only applicable to ChatGPT')
            
            
        with gr.Column():
            sentence1 = gr.Textbox(label="第一句话 / First Utterance")
            sentence2 = gr.Textbox(label="第二句话 (可选) / Second Utterance (Optional)")
            gr.Examples([["You're watching TV again Peter.", "I have washed all the bowls and plates."],
                         ["May I speak to you, Mr. Hall?", "Sure, Sonya. What's the problem?"]], inputs=[sentence1, sentence2])
            
            chatbot = gr.Chatbot()
            chats = gr.State([])
            indices = gr.State([])

            btn = gr.Button("🚀Generate")
            btn2 = gr.Button('🔄Clear', elem_id = 'clear')
            btn2.click(lambda: [[], [], []], None, [chatbot, chats, indices], queue=False)
            btn.click(chat_generator, inputs=[chatbot, model_a, model_b, prompt_a, 
                        prompt_b, key_a, key_b, 
                        sentence1, sentence2, round_max, temperature, chats, indices], outputs=[chatbot, chats, indices])
    

demo.queue().launch(server_name='0.0.0.0', share=True)


ImportError: cannot import name 'Notification' from 'plyer' (C:\Users\USER\anaconda3\Lib\site-packages\plyer\__init__.py)

In [8]:
!pip install openai
!pip install openai transformers



In [8]:
#Task 2
import openai

openai.api_key = "your-api-key"

def gpt_generate(prompt):
    response = openai.Completion.create(
        model="gpt-3.5-turbo",
        prompt=prompt,
        max_tokens=100,
    )
    return response.choices[0].text.strip()

In [None]:
pip install --upgrade openai

In [None]:
import openai
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# GPT-3 Setup (OpenAI)
openai.api_key = "your-api-key"

def gpt3_generate(prompt):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            max_tokens=100,
            temperature=0.7
        )
        return response.choices[0].message['content'].strip()
    except Exception as e:
        return f"Error with GPT-3: {e}"


# GPT-NeoX and BLOOM Setup (Hugging Face)
def load_model_and_tokenizer(model_name):
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
        if torch.cuda.is_available():
            model = model.to('cuda')
        return tokenizer, model
    except Exception as e:
        return f"Error loading model {model_name}: {e}", None

def generate_with_huggingface(prompt, model_name):
    tokenizer, model = load_model_and_tokenizer(model_name)
    if model is None:
        return tokenizer  # error message from load_model_and_tokenizer

    try:
        inputs = tokenizer(prompt, return_tensors="pt")
        if torch.cuda.is_available():
            inputs = {k: v.to('cuda') for k, v in inputs.items()}
        output = model.generate(inputs['input_ids'], max_length=100, temperature=0.7)
        return tokenizer.decode(output[0], skip_special_tokens=True)
    except Exception as e:
        return f"Error generating text with {model_name}: {e}"


# Unified function to generate text from all three models
def unified_generate(prompt, model_choice):
    if model_choice == 'gpt-3':
        return gpt3_generate(prompt)
    
    elif model_choice == 'gpt-neox':
        return generate_with_huggingface(prompt, "EleutherAI/gpt-neox-20b")
    
    elif model_choice == 'bloom':
        return generate_with_huggingface(prompt, "bigscience/bloom")
    
    else:
        return "Invalid model choice. Choose between 'gpt-3', 'gpt-neox', or 'bloom'."


# Example usage
prompt = "Explain the significance of machine learning in data science."
print("Using GPT-3:", unified_generate(prompt, 'gpt-3'))
print("Using GPT-NeoX:", unified_generate(prompt, 'gpt-neox'))
print("Using BLOOM:", unified_generate(prompt, 'bloom'))


In [None]:
#Task 3
import sqlite3

# Log interactions
def log_interaction(user_query, bot_response, sentiment, topic):
    conn = sqlite3.connect('analytics.db')
    c = conn.cursor()
    c.execute("INSERT INTO interactions (query, response, sentiment, topic) VALUES (?, ?, ?, ?)", 
              (user_query, bot_response, sentiment, topic))
    conn.commit()
    conn.close()

# Analytics visualization (Streamlit)
import streamlit as st
import pandas as pd

def display_analytics():
    conn = sqlite3.connect('analytics.db')
    data = pd.read_sql_query("SELECT * FROM interactions", conn)
    st.write("Total Queries:", len(data))
    st.write("Common Topics:", data['topic'].value_counts().head(10))
    st.bar_chart(data['topic'].value_counts().head(10))

In [None]:
#Task 4
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# Load MedQuAD Dataset
df = pd.read_csv('MedQuAD.csv')

def find_similar_questions(user_query):
    tfidf = TfidfVectorizer().fit_transform(df['questions'])
    query_vec = TfidfVectorizer().fit_transform([user_query])
    similarity = cosine_similarity(query_vec, tfidf)
    index = similarity.argmax()
    return df['answers'][index]

In [None]:
#Task 5
def handle_image_and_text(image, text):
    # Use image processing model (e.g., CLIP) for image understanding
    # Use text processing model (e.g., GPT) for text understanding
    return "Processed image and text"

In [None]:
#Task 6
import faiss
import numpy as np

def update_knowledge_base(new_documents):
    index = faiss.IndexFlatL2(128)
    vectors = np.array([doc.vector for doc in new_documents])
    index.add(vectors)
    return index

In [None]:
#Task 7
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()

def analyze_sentiment(message):
    score = analyzer.polarity_scores(message)
    return "positive" if score['compound'] > 0 else "negative"

In [None]:
#Task 8
arxiv_df = pd.read_csv('arxiv.csv')

def summarize_paper(paper_id):
    paper_text = arxiv_df[arxiv_df['id'] == paper_id]['abstract'].values[0]
    return summarize_text(paper_text)

In [None]:
#Task 9
from langdetect import detect
from transformers import MarianMTModel, MarianTokenizer

def translate_text(text, src_lang, tgt_lang):
    model_name = f'Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}'
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)
    
    tokens = tokenizer(text, return_tensors="pt", padding=True)
    translation = model.generate(**tokens)
    return tokenizer.decode(translation[0], skip_special_tokens=True)