[Reference](https://levelup.gitconnected.com/how-i-created-the-most-powerful-nlp-library-in-python-c26d08a55809)

# Installing the Library

In [2]:
!git clone https://github.com/FareedKhan-dev/Most-powerful-NLP-library.git

Cloning into 'Most-powerful-NLP-library'...
remote: Enumerating objects: 24, done.[K
remote: Counting objects: 100% (24/24), done.[K
remote: Compressing objects: 100% (18/18), done.[K
remote: Total 24 (delta 12), reused 13 (delta 6), pack-reused 0[K
Receiving objects: 100% (24/24), 22.05 KiB | 3.67 MiB/s, done.
Resolving deltas: 100% (12/12), done.


In [4]:
# Install the Google Generative AI library
!pip install -q -U google-generativeai

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/146.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/146.9 kB[0m [31m917.2 kB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━[0m [32m92.2/146.9 kB[0m [31m1.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m146.9/146.9 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [10]:
%cd Most-powerful-NLP-library

/content/Most-powerful-NLP-library


In [11]:
!ls

code.ipynb  core_nlp.py  for_beginners	pre_processing.py  README.md


# Initiating the Library

In [16]:
# Import the Google Generative AI library
import google.generativeai as genai
import os
os.environ['GOOGLE_API_KEY'] = "Your-API-key"

# Configure the library with your API key
genai.configure(api_key = os.environ['GOOGLE_API_KEY'])

# Initialize the GenerativeModel with 'gemini-pro'
model = genai.GenerativeModel('gemini-pro')

# Cleaning the Text

In [22]:
# Import the clean_text function from the pre_processing module
from pre_processing import clean_text

# User input text
user_input = '''faree$$@$%d khan will arrive at 9:00 AM.
                He will@%$ 1meet you at the airport.'''

# Clean the text using the specified model
cleaned_text = clean_text(user_input, model)

# Print the cleaned text
print(cleaned_text)

Fared Khan will arrive at 9:00 AM.
He will meet you at the airport.


# Perform Lemmatization or Stemming

In [23]:
# Import the lemmatize_text and stem_text functions from the pre_processing module
from pre_processing import lemmatize_text, stem_text

# User input text
user_input = '''The cats are running and playing in the gardens,
                while the dogs are barking loudly and chasing their tails'''

# Lemmatize the text using the specified model
lemmatized_sentence = lemmatize_text(user_input, model)

# Stem the text using the specified model
stemmed_sentence = stem_text(user_input, model)

# Print the lemmatized and stemmed sentences
print(lemmatized_sentence)
print(stemmed_sentence)

The cat be run and play in the garden, while the dog be bark loud and chase their tail
Cat run play garden dog bark loud chase tail


# Simplifying NER Detection and POS Tagging

In [24]:
# Import the detect_ner function from the core_nlp module
from core_nlp import detect_ner

# User input text
user_input = "I will meet you at the airport sharp 12:00 AM."

# Specify NER tags (optional, default includes 'person, location, date, number, ...')
ner_tags = 'person, location, date, number, ... cardinal'

# Detect named entities in the text using the specified model and NER tags
ner_result = detect_ner(input_text=user_input, ner_tags=ner_tags, model=model)

# Print the NER result
print(ner_result)

airport: location
12:00 AM: time


In [25]:
# Import the detect_pos function from the core_nlp module
from core_nlp import detect_pos

# User input text
user_input = "I will meet you at the airport sharp 12:00 AM."

# Specify POS tags (optional, default includes 'NOUN, verb, adjective, adverb, ...')
pos_tags = 'noun, verb, adjective, adverb, pronoun, ... entity_phrase'

# Detect part-of-speech in the text using the specified model and POS tags
pos_result = detect_pos(input_text=user_input, pos_tags=pos_tags, model=model)

# Print the POS result
print(pos_result)

I: pronoun
will: verb
meet: verb
you: pronoun
at: preposition
the: determiner
airport: noun
sharp: adjective
12:00: time
AM: time


# Text Pattern Matching


In [26]:
# Import the extract_patterns function from the pre_processing module
from pre_processing import extract_patterns

# User input text
user_input = '''The phone number of fareed khan is 123-456-7890 and 523-456-7892. Please call for assistance and email me at x123@gmail.com'''

# Define patterns for extraction
pattern_matching = '''email, phone number, name'''

# Extract patterns from the input text using the specified model and patterns
extracted_patterns = extract_patterns(user_input, pattern_matching, model)

# Print the extracted patterns
print(extracted_patterns)

['123-456-7890', '523-456-7892', 'x123@gmail.com', 'fareed khan']


# Text Classification

In [27]:
# Import the analyze_sentiment function from the core_nlp module
from core_nlp import analyze_sentiment

# User input text
user_input = "I love to play football, but today I am feeling very sad. I do not want to play football today."

# Specify sentiment categories (optional, default includes 'positive, negative, neutral')
category = "positive, negative, neutral"

# Analyze sentiment in the text using the specified model and sentiment categories
sentiment_result = analyze_sentiment(input_text=user_input, category=category, explanation=True, model=model)

# Print the sentiment result
print(sentiment_result)

**Category: Negative**

**Short Explanation:**

The overall sentiment of the text is negative. The author starts by expressing a love for playing football, but then goes on to say that today they are feeling very sad and do not want to play football. This indicates a negative sentiment towards playing football today.


In [28]:
# Import the classify_topic function from the core_nlp module
from core_nlp import classify_topic

# User input text
user_input = "I love to play football, but today I am feeling very sad. I do not want to play football today."

# Specify topics (optional, default includes 'story, horror, comedy')
topics = "story, horror, comedy"

# Classify the topic of the text using the specified model and topics
topic_result = classify_topic(input_text=user_input, topics=topics, explanation=True, model=model)

# Print the topic result
print(topic_result)

Topic: Story

Explanation: The input text is a short story about a person who loves to play football but is feeling sad and does not want to play today. This is a common theme in stories, where the protagonist faces a challenge or obstacle that they must overcome.


In [29]:
# Import the detect_spam function from the core_nlp module
from core_nlp import detect_spam

# User input text
user_input = "you have just won $14000, claim this award here at this link."

# Specify spam categories (optional, default includes 'spam, not_spam, unknown')
category = 'spam, not_spam, unknown'

# Detect spam in the text using the specified model and spam categories
spam_result = detect_spam(input_text=user_input, category=category, explanation=True, model=model)

# Print the spam result
print(spam_result)

spam

Explanation: The text contains an unsolicited offer of a large sum of money, which is a common tactic used in spam emails. The link provided is likely malicious and could lead to the user's personal information being stolen or their computer being infected with malware.


# Semantic Role Labeling (SRL)

In [30]:
# Import the perform_srl function from the core_nlp module
from core_nlp import perform_srl

# User input text
user_input = "tornado is approaching the city, please take shelter"

# Perform Semantic Role Labeling (SRL) on the text using the specified model
srl_result = perform_srl(user_input, model)

# Print the SRL result
print(srl_result)

Predicate: approaching
Roles:
- Agent: tornado
- Theme: city


# Intent Recognition

In [31]:
# Import the recognize_intent function from the core_nlp module
from core_nlp import recognize_intent

# User input text
user_input = "tornado is approaching the city, please take shelter"

# Recognize intent in the text using the specified model
intent_result = recognize_intent(user_input, model)

# Print the intent result
print(intent_result)

Intent: Take shelter


# Handling Large Data

In [32]:
# Example text dataset
text_dataset = "some_big_text_file.txt"

# Break the text into sentences based on full stops
sentences = text_dataset.split('. ')

# some ner_tags you have defined
ner_tags = "person, organization ..."

# Applying NER on it
for i, sentence in enumerate(sentences):
    print(f"Sentence {i + 1}:")

    # Applying NER on each sentence
    detect_ner(input_text=sentence, ner_tags=ner_tags, model=model)

In [33]:
# function for paraphrase detection
def paraphrasing_detection(input_text, explanation, model):

    # Check if explanation is required
    explanation_text = 'short explanation: ' if explanation else 'no explanation'

    # Question to be asked for determining paraphrasing
    question = f'''Given the input text, determine if two sentences are paraphrases of each other.
    Sentence 1: {input_text[0]}
    Sentence 2: {input_text[1]}
    Answer must be 'yes' or 'no'.
    {explanation_text}
    '''

    # Generate response
    response = model.generate_content(question)
    return response.text.strip()

In [34]:
# Import the paraphrasing_detection function from the core_nlp module
from core_nlp import paraphrasing_detection

# User input text
user_input = ['''The sun sets in the west every evening.''', '''Every evening, the sun goes down in the west.''']

# Perform paraphrasing detection using the specified model
intent_result = paraphrasing_detection(user_input, explanation=True, model=model)

# Print the paraphrasing detection result
print(intent_result)

yes
Explanation:
Sentence 1 and Sentence 2 convey the same meaning using different words. "Sets" and "goes down" are synonyms, and "west" refers to the same direction in both sentences. The time frame of "every evening" is also the same in both sentences. Therefore, the two sentences are paraphrases of each other.
