#### Named Entity Recognition (NER) ####



In [None]:
!python -m spacy download en_core_web_sm

In [6]:
import spacy
import pandas as pd
from spacy import displacy

# Load the English language model
nlp = spacy.load("en_core_web_sm")

# Sample text (you can replace this with real-world data)
text = """
Apple Inc. is planning to open a new office in New York City by 2024. 
The tech giant's CEO, Tim Cook, announced the expansion during a press conference last Friday. 
The new office, located in Manhattan, will create about 2,000 jobs in various sectors including AI and machine learning.
"""

# Process the text
doc = nlp(text)

# Extract named entities
entities = [(ent.text, ent.label_) for ent in doc.ents]

# Create a DataFrame for better visualization
df = pd.DataFrame(entities, columns=['Entity', 'Type'])
print(df)

# Visualize the entities in the text
displacy.render(doc, style="ent", jupyter=True)

# Function to extract specific entity types
def extract_entity_type(text, entity_type):
    doc = nlp(text)
    return [ent.text for ent in doc.ents if ent.label_ == entity_type]

# Example usage
organizations = extract_entity_type(text, "ORG")
people = extract_entity_type(text, "PERSON")
dates = extract_entity_type(text, "DATE")

print(f"Organizations: {organizations}")
print(f"People: {people}")
print(f"Dates: {dates}")

          Entity      Type
0     Apple Inc.       ORG
1  New York City       GPE
2           2024      DATE
3       Tim Cook    PERSON
4    last Friday      DATE
5      Manhattan       GPE
6    about 2,000  CARDINAL
7             AI       ORG


Organizations: ['Apple Inc.', 'AI']
People: ['Tim Cook']
Dates: ['2024', 'last Friday']


#### Text Summarizer ####

In [2]:
pip install transformers

Collecting transformers
  Downloading transformers-4.43.1-py3-none-any.whl (9.4 MB)
     ---------------------------------------- 9.4/9.4 MB 741.0 kB/s eta 0:00:00
Collecting huggingface-hub<1.0,>=0.23.2
  Downloading huggingface_hub-0.24.1-py3-none-any.whl (417 kB)
     -------------------------------------- 417.2/417.2 kB 1.0 MB/s eta 0:00:00
Collecting tokenizers<0.20,>=0.19
  Downloading tokenizers-0.19.1-cp39-none-win_amd64.whl (2.2 MB)
     ---------------------------------------- 2.2/2.2 MB 2.4 MB/s eta 0:00:00
Collecting safetensors>=0.4.1
  Downloading safetensors-0.4.3-cp39-none-win_amd64.whl (287 kB)
     ------------------------------------- 287.9/287.9 kB 86.3 kB/s eta 0:00:00
Installing collected packages: safetensors, huggingface-hub, tokenizers, transformers
Successfully installed huggingface-hub-0.24.1 safetensors-0.4.3 tokenizers-0.19.1 transformers-4.43.1
Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install torch

Note: you may need to restart the kernel to use updated packages.


In [1]:
import torch
from transformers import BartForConditionalGeneration, BartTokenizer

# Load pre-trained model and tokenizer
model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")

def summarize_text(text, max_length=150, min_length=50):
    # Encode the text
    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)
    
    # Generate summary
    summary_ids = model.generate(inputs, max_length=max_length, min_length=min_length, length_penalty=2.0, num_beams=4, early_stopping=True)
    
    # Decode the summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    return summary

# Example usage
article = """
The United Nations Climate Change Conference, more commonly referred to as COP26, was held in Glasgow, Scotland, United Kingdom, from 31 October to 13 November 2021. The conference was the 26th United Nations Climate Change conference and brought together parties to accelerate action towards the goals of the Paris Agreement and the UN Framework Convention on Climate Change.

The conference was originally scheduled to be held in 2020, but was postponed due to the COVID-19 pandemic. More than 190 world leaders participated, along with tens of thousands of negotiators, government representatives, businesses and citizens for twelve days of talks.

The main goals of COP26 were to secure global net-zero emissions by mid-century and keep 1.5 degrees Celsius of warming within reach, adapt to protect communities and natural habitats, mobilize finance, and work together to deliver on climate action promises.

Key outcomes of the conference included new pledges from some countries to cut emissions, a global agreement to reduce coal use, and promises of increased financial support for developing countries. However, many activists and experts argued that the commitments made at COP26 were not ambitious enough to effectively address the climate crisis.
"""

summary = summarize_text(article)
print("Summary:")
print(summary)

config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

KeyboardInterrupt: 

In [3]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.probability import FreqDist
from heapq import nlargest

nltk.download('punkt')
nltk.download('stopwords')

def simple_summarize(text, num_sentences=3):
    # Tokenize the text into sentences and words
    sentences = sent_tokenize(text)
    words = word_tokenize(text.lower())
    
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_words]
    
    # Calculate word frequencies
    freq = FreqDist(words)
    
    # Score sentences based on word frequencies
    scores = {}
    for i, sentence in enumerate(sentences):
        for word in word_tokenize(sentence.lower()):
            if word in freq:
                if i not in scores:
                    scores[i] = freq[word]
                else:
                    scores[i] += freq[word]
    
    # Get the top n sentences
    top_sentences = nlargest(num_sentences, scores, key=scores.get)
    
    # Sort the sentences by their original order
    top_sentences.sort()
    
    # Join the top sentences
    summary = ' '.join([sentences[i] for i in top_sentences])
    
    return summary

# Example usage
article = """
The United Nations Climate Change Conference, more commonly referred to as COP26, was held in Glasgow, Scotland, United Kingdom, from 31 October to 13 November 2021. The conference was the 26th United Nations Climate Change conference and brought together parties to accelerate action towards the goals of the Paris Agreement and the UN Framework Convention on Climate Change.

The conference was originally scheduled to be held in 2020, but was postponed due to the COVID-19 pandemic. More than 190 world leaders participated, along with tens of thousands of negotiators, government representatives, businesses and citizens for twelve days of talks.

The main goals of COP26 were to secure global net-zero emissions by mid-century and keep 1.5 degrees Celsius of warming within reach, adapt to protect communities and natural habitats, mobilize finance, and work together to deliver on climate action promises.

Key outcomes of the conference included new pledges from some countries to cut emissions, a global agreement to reduce coal use, and promises of increased financial support for developing countries. However, many activists and experts argued that the commitments made at COP26 were not ambitious enough to effectively address the climate crisis.
"""

summary = simple_summarize(article)
print("Summary:")
print(summary)

Summary:

The United Nations Climate Change Conference, more commonly referred to as COP26, was held in Glasgow, Scotland, United Kingdom, from 31 October to 13 November 2021. More than 190 world leaders participated, along with tens of thousands of negotiators, government representatives, businesses and citizens for twelve days of talks. The main goals of COP26 were to secure global net-zero emissions by mid-century and keep 1.5 degrees Celsius of warming within reach, adapt to protect communities and natural habitats, mobilize finance, and work together to deliver on climate action promises.


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\AADITI\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\AADITI\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


#### Chatbot Development ####

In [7]:
import nltk
from nltk.chat.util import Chat, reflections
import tkinter as tk
from tkinter import scrolledtext
import datetime

# Download necessary NLTK data
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')

# Define patterns and responses
patterns = [
    (r'hi|hello|hey', ['Hello!', 'Hi there!', 'Hey!']),
    (r'how are you', ["I'm doing well, thank you!", "I'm fine, how about you?"]),
    (r'what is your name', ["My name is ChatBot.", "I'm ChatBot, nice to meet you!"]),
    (r'bye|goodbye', ['Goodbye!', 'See you later!', 'Bye!']),
    (r'what can you do', ['I can answer simple questions and engage in basic conversation.']),
    (r'(.*) weather (.*)', ["I'm sorry, I don't have access to real-time weather information."]),
    (r'(.*) (love|like) (.*)', ["That's great! I'm glad you enjoy that."]),
    (r'(.*) (hate|dislike) (.*)', ["I'm sorry to hear that. Maybe we can talk about something you like instead?"]),
    (r'(.*)', ["I'm not sure I understand. Could you rephrase that?", "Interesting. Tell me more about that."])
]

# Create the chatbot
chatbot = Chat(patterns, reflections)

# Function to handle sending messages
def send_message():
    user_input = user_entry.get()
    chat_history.insert(tk.END, "You: " + user_input + "\n")
    response = chatbot.respond(user_input)
    chat_history.insert(tk.END, "ChatBot: " + response + "\n\n")
    user_entry.delete(0, tk.END)
    
    # Store the conversation
    with open("chat_history.txt", "a") as file:
        timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        file.write(f"{timestamp}\n")
        file.write(f"User: {user_input}\n")
        file.write(f"ChatBot: {response}\n\n")

# Create the main window
root = tk.Tk()
root.title("ChatBot GUI")

# Create and pack the chat history display
chat_history = scrolledtext.ScrolledText(root, width=50, height=20)
chat_history.pack(padx=10, pady=10)

# Create and pack the user input field
user_entry = tk.Entry(root, width=50)
user_entry.pack(padx=10, pady=5)

# Create and pack the send button
send_button = tk.Button(root, text="Send", command=send_message)
send_button.pack(pady=5)

# Start the GUI event loop
root.mainloop()

#### Language Translation ####

In [9]:
pip install sentencepiece

Collecting sentencepiece
  Downloading sentencepiece-0.2.0-cp39-cp39-win_amd64.whl (991 kB)
     -------------------------------------- 991.5/991.5 kB 4.5 MB/s eta 0:00:00
Installing collected packages: sentencepiece
Successfully installed sentencepiece-0.2.0
Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install sacremoses

Collecting sacremoses
  Downloading sacremoses-0.1.1-py3-none-any.whl (897 kB)
     ------------------------------------ 897.5/897.5 kB 945.9 kB/s eta 0:00:00
Installing collected packages: sacremoses
Successfully installed sacremoses-0.1.1
Note: you may need to restart the kernel to use updated packages.


In [5]:
import tkinter as tk
from tkinter import ttk, messagebox
from transformers import MarianMTModel, MarianTokenizer

# Dictionary to hold models and tokenizers
models = {}
tokenizers = {}

def get_model_and_tokenizer(source_lang, target_lang):
    model_name = f'Helsinki-NLP/opus-mt-{source_lang}-{target_lang}'
    if model_name not in models:
        model = MarianMTModel.from_pretrained(model_name)
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        models[model_name] = model
        tokenizers[model_name] = tokenizer
    else:
        model = models[model_name]
        tokenizer = tokenizers[model_name]
    return model, tokenizer

# Function to perform translation
def translate_text(text, source_lang, target_lang):
    try:
        model, tokenizer = get_model_and_tokenizer(source_lang, target_lang)
        inputs = tokenizer(text, return_tensors="pt", padding=True)
        translated = model.generate(**inputs)
        translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
        return translated_text
    except Exception as e:
        messagebox.showerror("Translation Error", str(e))
        return ""

# Function to handle the translation button click
def on_translate():
    source_text = source_text_entry.get("1.0", tk.END).strip()
    if not source_text:
        messagebox.showwarning("Input Error", "Please enter text to translate")
        return
    source_lang = source_lang_combo.get()
    target_lang = target_lang_combo.get()
    source_lang_code = lang_to_code[source_lang]
    target_lang_code = lang_to_code[target_lang]
    translated_text = translate_text(source_text, source_lang_code, target_lang_code)
    translated_text_entry.delete("1.0", tk.END)
    translated_text_entry.insert(tk.END, translated_text)

# Mapping of languages to their codes
lang_to_code = {
    "English": "en",
    "Spanish": "es",
    "French": "fr",
    "German": "de",
    "Italian": "it",
    "Portuguese": "pt",
    "Dutch": "nl",
    "Russian": "ru",
    "Chinese": "zh",
    "Japanese": "ja"
}

# Create the main window
root = tk.Tk()
root.title("Translation System")
root.geometry("600x400")

# Source language label and combo box
source_lang_label = ttk.Label(root, text="Source Language:")
source_lang_label.pack(pady=5)
source_lang_combo = ttk.Combobox(root, values=list(lang_to_code.keys()), state="readonly")
source_lang_combo.pack(pady=5)
source_lang_combo.set("English")

# Target language label and combo box
target_lang_label = ttk.Label(root, text="Target Language:")
target_lang_label.pack(pady=5)
target_lang_combo = ttk.Combobox(root, values=list(lang_to_code.keys()), state="readonly")
target_lang_combo.pack(pady=5)
target_lang_combo.set("Spanish")

# Source text entry
source_text_label = ttk.Label(root, text="Enter text to translate:")
source_text_label.pack(pady=5)
source_text_entry = tk.Text(root, height=5, width=70)
source_text_entry.pack(pady=5)

# Translate button
translate_button = ttk.Button(root, text="Translate", command=on_translate)
translate_button.pack(pady=5)

# Translated text entry
translated_text_label = ttk.Label(root, text="Translated text:")
translated_text_label.pack(pady=5)
translated_text_entry = tk.Text(root, height=5, width=70)
translated_text_entry.pack(pady=5)

# Run the main loop
root.mainloop()
