# 1. Vectorizaton and Tokenization

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer

# Sample text
#texts = ["I love deep learning", "Deep learning is amazing"]
texts = ["I love deep learning", "Deep learning is amazing"]

# Create a tokenizer
tokenizer = Tokenizer()

# Fit the tokenizer on the text
tokenizer.fit_on_texts(texts)

# Show word index (each word gets a number)
print("Word Index:", tokenizer.word_index)

# Convert text to sequences of numbers
sequences = tokenizer.texts_to_sequences(texts)

print("Tokenized & Vectorized:", sequences)


Word Index: {'deep': 1, 'learning': 2, 'i': 3, 'love': 4, 'is': 5, 'amazing': 6}
Tokenized & Vectorized: [[3, 4, 1, 2], [1, 2, 5, 6]]


# 2. Sentiment analysis

In [25]:
from textblob import TextBlob

# Sample text
text = ["I like action movies",
        "I like action movies very much",
        "I love this product, it's amazing!",
        " dont known why I feel uncomforatable",
        "I watched a movie last night when I was sleepy mood. ",
        "I feel headache and getting angry soon. ",
        "I feel irritating, leave me alone",
        "Leave me alone else I wil shout on you",
        "Leave me alone else I wil shout on you due to angry."]
for i in text:
    # Create a TextBlob object
    blob = TextBlob(i)

    # Get the sentiment polarity (ranges from -1 to 1)
    sentiment = blob.sentiment.polarity
    print("Sentiment score : ",sentiment,end="")
    # Output the sentiment
    if sentiment > 0:
        print(" ; Positive sentiment : ",i)
    elif sentiment < 0:
        print(" ; Negative sentiment : ",i)
    else:
        print(" ; Neutral sentiment : ",i)


Sentiment score :  0.1 ; Positive sentiment :  I like action movies
Sentiment score :  0.18 ; Positive sentiment :  I like action movies very much
Sentiment score :  0.625 ; Positive sentiment :  I love this product, it's amazing!
Sentiment score :  0.0 ; Neutral sentiment :   dont known why I feel uncomforatable
Sentiment score :  0.0 ; Neutral sentiment :  I watched a movie last night when I was sleepy mood. 
Sentiment score :  -0.5 ; Negative sentiment :  I feel headache and getting angry soon. 
Sentiment score :  -0.4 ; Negative sentiment :  I feel irritating, leave me alone
Sentiment score :  0.0 ; Neutral sentiment :  Leave me alone else I wil shout on you
Sentiment score :  -0.3125 ; Negative sentiment :  Leave me alone else I wil shout on you due to angry.


In [None]:
"""from pattern.en import sentiment, lexicon

# See number of words in the lexicon
print(len(lexicon))  # Output: ~2900 words

# Print a few words and their polarity
for word in list(lexicon.keys())[:10]:
    print(word, sentiment(word))
"""

ModuleNotFoundError: No module named 'pattern.en'

# 3. Text clasifiction

In [None]:
"""
After performing Sentiment Analysis, if you use the labeled data (like positive, negative, neutral) to train a 
machine learning model that can predict the sentiment of new, unseen text, you're performing: ✅ Text Classification.
"""

In [20]:
from textblob import TextBlob

# Sample text
data=dict()
text = ["I like action movies",
        "I love this product, it's amazing!",
        "I watched a movie last night when I was sleepy mood. ",
        "I feel headache and getting angry soon. ",
        "Leave me alone else I wil shout on you",
        "Leave me alone else I wil shout on you due to angry."]
for i in text:
    # Create a TextBlob object
    blob = TextBlob(i)

    # Get the sentiment polarity (ranges from -1 to 1)
    sentiment = blob.sentiment.polarity
    print("Sentiment score : ",sentiment,end="")
    # Output the sentiment
    if sentiment > 0:
        print(" ; Positive sentiment : ",i)
        data.update({i:"Positive sentiment"})
    elif sentiment < 0:
        print(" ; Negative sentiment : ",i)
        data.update({i:"Negative sentiment"})
    else:
        print(" ; Neutral sentiment : ",i)
        data.update({i:"Neutral sentiment"})
data

Sentiment score :  0.1 ; Positive sentiment :  I like action movies
Sentiment score :  0.625 ; Positive sentiment :  I love this product, it's amazing!
Sentiment score :  0.0 ; Neutral sentiment :  I watched a movie last night when I was sleepy mood. 
Sentiment score :  -0.5 ; Negative sentiment :  I feel headache and getting angry soon. 
Sentiment score :  0.0 ; Neutral sentiment :  Leave me alone else I wil shout on you
Sentiment score :  -0.3125 ; Negative sentiment :  Leave me alone else I wil shout on you due to angry.


{'I like action movies': 'Positive sentiment',
 "I love this product, it's amazing!": 'Positive sentiment',
 'I watched a movie last night when I was sleepy mood. ': 'Neutral sentiment',
 'I feel headache and getting angry soon. ': 'Negative sentiment',
 'Leave me alone else I wil shout on you': 'Neutral sentiment',
 'Leave me alone else I wil shout on you due to angry.': 'Negative sentiment'}

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB

# Sample data

texts = list(data.keys())       #["I love this! amazing", "I hate this!", "Best experience", "Worst experience"]
labels = list(data.values())    #["positive", "negative", "positive", "negative"]

# Convert text to numbers
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(texts)

# Train model
model = MultinomialNB()
model.fit(X, labels)

# Predict
test = ["It was amazing", "Terrible service"]
X_test = vectorizer.transform(test)
for i,j in zip(test,model.predict(X_test)):
    print(i," : ",j)


It was amazing  :  Positive sentiment
Terrible service  :  Negative sentiment


# 4. Machine Translation

In [None]:
from transformers import MarianMTModel, MarianTokenizer
import warnings
warnings.filterwarnings("ignore")

# English to Hindi model
model_name = 'Helsinki-NLP/opus-mt-en-hi'
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)

# Translate
text = "How are you? Are u fine ?"
tokens = tokenizer.prepare_seq2seq_batch([text], return_tensors="pt")
translation = model.generate(**tokens)
translated_text = tokenizer.decode(translation[0], skip_special_tokens=True)

print(translated_text)  # Output: आप कैसे हैं?


Error while downloading from https://cdn-lfs.hf.co/Helsinki-NLP/opus-mt-en-hi/46ae1116913bce01c9d848a78f62da2bd986d728bced4dc1acd5fedf4338ac5e?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model.safetensors%3B+filename%3D%22model.safetensors%22%3B&Expires=1745250400&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc0NTI1MDQwMH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby9IZWxzaW5raS1OTFAvb3B1cy1tdC1lbi1oaS80NmFlMTExNjkxM2JjZTAxYzlkODQ4YTc4ZjYyZGEyYmQ5ODZkNzI4YmNlZDRkYzFhY2Q1ZmVkZjQzMzhhYzVlP3Jlc3BvbnNlLWNvbnRlbnQtZGlzcG9zaXRpb249KiJ9XX0_&Signature=R9qKhvZ2-Xx%7E1F8qVm%7ErxLv%7E9e%7EMrP31nBbmNIMa7i6LbSgG4Tr7KXvpzPRY4cEQdBv5EdN7xZ45GOvZ4PXJstYJIAyZhSRUZWYdcNusfJf82xgok7GiRzuHDqoPAseOS0YAW8Xx8QVH2-4BMsPJD5sQtU1EM7NSvNeCqm%7EZm4c4dK0M1-9cWs2si3hJbiLuAP1Tr5TVT%7E5bT3koa7zy67ANsAhe-nnvdI-AEHM-n4mxisjjVuQxY5lENjGJXrb9FSN69suD0WmYZ4bycOYtANYC%7EQL6qAZO6bNV7bme7BeUO6-obsC60%7ELdhf-BBMws2E10M06cp2ARtG8KyNbB9Q__&Key-Pair-Id=K3RPWS32NSSJCE

क्या यू ठीक है?


# 6. Named Entity Recognition (NER)

# 7. Text Summarization

# 8. Speech Recognition

# 9. Autocorrect and Spell Check

# 10. Information Retrieval (Search Engines)