In [4]:
# Import required libraries
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from googletrans import Translator
import spacy
from gtts import gTTS
import os

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')


[nltk_data] Downloading package punkt to
[nltk_data]     /home/the_architect/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/the_architect/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/the_architect/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [5]:
# Step 1: Write chatbot description to file
chatbot_text = """ChatGPT is a sophisticated AI language model developed by OpenAI. It serves as a versatile conversational agent capable of understanding and generating human-like text responses. The chatbot uses advanced natural language processing techniques and was trained on vast amounts of internet text data. ChatGPT can assist users with various tasks including writing, coding, answering questions, and providing explanations. It has found applications in customer service, education, content creation, and technical support. The model works by processing input text through multiple transformer layers and generating contextually relevant responses."""

with open('chatbot.txt', 'w') as f:
    f.write(chatbot_text)

In [6]:
# Step 2: Process text into sentences and words
with open('chatbot.txt', 'r') as f:
    text = f.read()

sentences = sent_tokenize(text)
words = word_tokenize(text)

print("Sentences:")
for i, sent in enumerate(sentences, 1):
    print(f"{i}. {sent}")

print("\nWords:")
print(words)

Sentences:
1. ChatGPT is a sophisticated AI language model developed by OpenAI.
2. It serves as a versatile conversational agent capable of understanding and generating human-like text responses.
3. The chatbot uses advanced natural language processing techniques and was trained on vast amounts of internet text data.
4. ChatGPT can assist users with various tasks including writing, coding, answering questions, and providing explanations.
5. It has found applications in customer service, education, content creation, and technical support.
6. The model works by processing input text through multiple transformer layers and generating contextually relevant responses.

Words:
['ChatGPT', 'is', 'a', 'sophisticated', 'AI', 'language', 'model', 'developed', 'by', 'OpenAI', '.', 'It', 'serves', 'as', 'a', 'versatile', 'conversational', 'agent', 'capable', 'of', 'understanding', 'and', 'generating', 'human-like', 'text', 'responses', '.', 'The', 'chatbot', 'uses', 'advanced', 'natural', 'languag

In [7]:
# Step 3: Remove stop words
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word.lower() not in stop_words and word.isalnum()]

In [8]:
# Step 4: Count stop words and non-stop words
stop_word_count = len([word for word in words if word.lower() in stop_words and word.isalnum()])
non_stop_word_count = len(filtered_words)

print("\nWord Count Summary:")
print(f"Stop words: {stop_word_count}")
print(f"Content words: {non_stop_word_count}")


Word Count Summary:
Stop words: 24
Content words: 62


In [9]:

# Step 5: Translate 10 selected words
translator = Translator()
selected_words = filtered_words[:10]  # Take first 10 filtered words

print("\nTranslations (to Hindi):")
for word in selected_words:
    translation = await translator.translate(word, dest='hi')
    print(f"{word}: {translation.text}")



Translations (to Hindi):
ChatGPT: चटपट
sophisticated: जटिल
AI: यह
language: भाषा
model: नमूना
developed: विकसित
OpenAI: ओपनई
serves: काम करना
versatile: बहुमुखी
conversational: संवादी


In [10]:
# Step 6: Named Entity Recognition
nlp = spacy.load("en_core_web_sm")
doc = nlp(text)

print("\nNamed Entities:")
for ent in doc.ents:
    print(f"{ent.text}: {ent.label_}")


Named Entities:
ChatGPT: ORG
AI: ORG
OpenAI: GPE


In [11]:
# Step 7: Convert to speech
tts = gTTS(text=text, lang='en')
tts.save("chatbot_speech.mp3")
print("\nAudio file saved as 'chatbot_speech.mp3'")


Audio file saved as 'chatbot_speech.mp3'
