In [1]:
# Named entity recognition using spacy322323
\
\
import spacy

nlp = spacy.load("en_core_web_sm")

text = """Tencent has launched the official version of its T1 reasoning model, enhancing its position in China's competitive artificial intelligence sector. 
The T1 model boasts improved response times and extended text processing capabilities, maintaining clear content logic and a low hallucination rate. 
This move follows the introduction of competitive AI models by DeepSeek, which offer comparable performance to Western systems at lower costs. 
The T1 model benefits from Tencent's Turbo S foundational language model, introduced last month, which the company claims is faster than DeepSeek's R1. 
Tencent has increased its AI investments and plans further capital expenditure in 2025 following significant spending in 2024."""

doc = nlp(text)

print("Named Entities, their Labels, and their Positions")
for ent in doc.ents:
    print(f"{ent.text} ({ent.label_})")




Named Entities, their Labels, and their Positions
T1 (CARDINAL)
China (GPE)
T1 (ORG)
AI (ORG)
DeepSeek (PRODUCT)
Western (NORP)
T1 (ORG)
Tencent (ORG)
Turbo S (ORG)
last month (DATE)
DeepSeek (PRODUCT)
AI (ORG)
2025 (DATE)
2024 (DATE)


In [3]:
# Sentiment analysis
from textblob import TextBlob

reviews = [
    "The battery life of this phone is amazing! It lasts two days easily.",
    "The camera quality is terrible. The pictures are always blurry.",
    "Fast performance and a great display, but the speaker quality is below average.",
    "The laptop heats up too quickly, making it hard to use for long hours.",
    "Absolutely love this smartwatch! The features are just what I needed.",
    "The product is below average and fairly matches the picture"
]

for review in reviews:
    sentiment_score = TextBlob(review).sentiment.polarity
    sentiment = "Positive" if sentiment_score > 0 else "Negative" if sentiment_score < 0 else "Neutral"

    print(f"Review: {review}\nSentiment: {sentiment} (Score {sentiment_score:.2f})\n")



Review: The battery life of this phone is amazing! It lasts two days easily.
Sentiment: Positive (Score 0.59)

Review: The camera quality is terrible. The pictures are always blurry.
Sentiment: Negative (Score -1.00)

Review: Fast performance and a great display, but the speaker quality is below average.
Sentiment: Positive (Score 0.28)

Review: The laptop heats up too quickly, making it hard to use for long hours.
Sentiment: Negative (Score -0.00)

Review: Absolutely love this smartwatch! The features are just what I needed.
Sentiment: Positive (Score 0.62)

Review: The product is below average and fairly matches the picture
Sentiment: Positive (Score 0.27)



In [10]:
# Tokenization using NLTK

from nltk.tokenize import word_tokenize
import nltk

nltk.download("punkt")

# txt
text = "The big brown fox jumped over the window!. But  notice that the cow is running towards an ajar door that was made of a rich blend of cinammon and oak"
tokens = word_tokenize(text)
print(f"Tokens: {tokens}")

Tokens: ['The', 'big', 'brown', 'fox', 'jumped', 'over', 'the', 'window', '!', '.', 'But', 'notice', 'that', 'the', 'cow', 'is', 'running', 'towards', 'an', 'ajar', 'door', 'that', 'was', 'made', 'of', 'a', 'rich', 'blend', 'of', 'cinammon', 'and', 'oak']


[nltk_data] Downloading package punkt to /home/joe/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [12]:
# Stopword removal
from nltk.corpus import stopwords
nltk.download("stopwords")

stop_words = set(stopwords.words('english'))

filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
print('Filtered Tokens:', filtered_tokens)


Filtered Tokens: ['big', 'brown', 'fox', 'jumped', 'window', '!', '.', 'notice', 'cow', 'running', 'towards', 'ajar', 'door', 'made', 'rich', 'blend', 'cinammon', 'oak']


[nltk_data] Downloading package stopwords to /home/joe/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [13]:
# Stemming
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()

stemmed_tokens = [stemmer.stem(token) for token in filtered_tokens]
print("stemmed tokens", stemmed_tokens)

stemmed tokens ['big', 'brown', 'fox', 'jump', 'window', '!', '.', 'notic', 'cow', 'run', 'toward', 'ajar', 'door', 'made', 'rich', 'blend', 'cinammon', 'oak']


In [14]:
# Lematization

from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')

lemmatizer = WordNetLemmatizer()

lemmatization_token = [lemmatizer.lemmatize(token) for token in filtered_tokens]
print('Lemmatization Token:', lemmatization_token)

Lemmatization Token: ['big', 'brown', 'fox', 'jumped', 'window', '!', '.', 'notice', 'cow', 'running', 'towards', 'ajar', 'door', 'made', 'rich', 'blend', 'cinammon', 'oak']


[nltk_data] Downloading package wordnet to /home/joe/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [15]:
# Case Conversion

lowercase_tokens = [token.lower() for token in lemmatization_token]
print('Lowercase Tokens:', lowercase_tokens)

Lowercase Tokens: ['big', 'brown', 'fox', 'jumped', 'window', '!', '.', 'notice', 'cow', 'running', 'towards', 'ajar', 'door', 'made', 'rich', 'blend', 'cinammon', 'oak']
