In [None]:
import nltk
import spacy
from nltk.stem import PorterStemmer, SnowballStemmer
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
from collections import Counter

# Download required resources
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt_tab')

# Initialize stemmers and lemmatizers
porter = PorterStemmer()
snowball = SnowballStemmer("english")
lemmatizer = WordNetLemmatizer()
nlp = spacy.load("en_core_web_sm")

# Sample corpus from different domains
corpus = {
    "news": "The economic downturn is affecting global markets. Investors are reconsidering their portfolios.",
    "wikipedia": "The mitochondrion is often referred to as the powerhouse of the cell.",
    "science_paper": "In recent studies, convolutional neural networks have demonstrated state-of-the-art performance in image classification tasks."
}

# Tokenize corpus
tokenized_corpus = {key: nltk.word_tokenize(value) for key, value in corpus.items()}

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


In [None]:
def apply_stemming(text_tokens, stemmer):
    return [stemmer.stem(word) for word in text_tokens]

for domain, tokens in tokenized_corpus.items():
    print(f"\n**{domain.upper()} DOMAIN**")
    print("Porter Stemming:", apply_stemming(tokens, porter))
    print("Snowball Stemming:", apply_stemming(tokens, snowball))



**NEWS DOMAIN**
Porter Stemming: ['the', 'econom', 'downturn', 'is', 'affect', 'global', 'market', '.', 'investor', 'are', 'reconsid', 'their', 'portfolio', '.']
Snowball Stemming: ['the', 'econom', 'downturn', 'is', 'affect', 'global', 'market', '.', 'investor', 'are', 'reconsid', 'their', 'portfolio', '.']

**WIKIPEDIA DOMAIN**
Porter Stemming: ['the', 'mitochondrion', 'is', 'often', 'refer', 'to', 'as', 'the', 'powerhous', 'of', 'the', 'cell', '.']
Snowball Stemming: ['the', 'mitochondrion', 'is', 'often', 'refer', 'to', 'as', 'the', 'powerhous', 'of', 'the', 'cell', '.']

**SCIENCE_PAPER DOMAIN**
Porter Stemming: ['in', 'recent', 'studi', ',', 'convolut', 'neural', 'network', 'have', 'demonstr', 'state-of-the-art', 'perform', 'in', 'imag', 'classif', 'task', '.']
Snowball Stemming: ['in', 'recent', 'studi', ',', 'convolut', 'neural', 'network', 'have', 'demonstr', 'state-of-the-art', 'perform', 'in', 'imag', 'classif', 'task', '.']


In [None]:
def apply_lemmatization(text_tokens, lemmatizer, pos=wordnet.VERB):
    return [lemmatizer.lemmatize(word, pos) for word in text_tokens]

for domain, tokens in tokenized_corpus.items():
    print(f"\n**{domain.upper()} DOMAIN**")
    print("NLTK Lemmatization:", apply_lemmatization(tokens, lemmatizer))



**NEWS DOMAIN**
NLTK Lemmatization: ['The', 'economic', 'downturn', 'be', 'affect', 'global', 'market', '.', 'Investors', 'be', 'reconsider', 'their', 'portfolios', '.']

**WIKIPEDIA DOMAIN**
NLTK Lemmatization: ['The', 'mitochondrion', 'be', 'often', 'refer', 'to', 'as', 'the', 'powerhouse', 'of', 'the', 'cell', '.']

**SCIENCE_PAPER DOMAIN**
NLTK Lemmatization: ['In', 'recent', 'study', ',', 'convolutional', 'neural', 'network', 'have', 'demonstrate', 'state-of-the-art', 'performance', 'in', 'image', 'classification', 'task', '.']


In [None]:
def spacy_lemmatization(text):
    doc = nlp(text)
    return [token.lemma_ for token in doc]

for domain, text in corpus.items():
    print(f"\n**{domain.upper()} DOMAIN**")
    print("SpaCy Lemmatization:", spacy_lemmatization(text))



**NEWS DOMAIN**
SpaCy Lemmatization: ['the', 'economic', 'downturn', 'be', 'affect', 'global', 'market', '.', 'investor', 'be', 'reconsider', 'their', 'portfolio', '.']

**WIKIPEDIA DOMAIN**
SpaCy Lemmatization: ['the', 'mitochondrion', 'be', 'often', 'refer', 'to', 'as', 'the', 'powerhouse', 'of', 'the', 'cell', '.']

**SCIENCE_PAPER DOMAIN**
SpaCy Lemmatization: ['in', 'recent', 'study', ',', 'convolutional', 'neural', 'network', 'have', 'demonstrate', 'state', '-', 'of', '-', 'the', '-', 'art', 'performance', 'in', 'image', 'classification', 'task', '.']


In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

nltk.download('punkt')
nltk.download('wordnet')

lemmatizer = WordNetLemmatizer()

GEN_Z_DICT = {
    "money": "bread", "work": "grind", "friend": "bestie", "tired": "exhausted af",
    "smart": "big-brain", "cool": "vibe check passed", "boring": "mid",
    "excited": "hyped", "love": "obsessed", "understand": "get it, bestie",
    "bad": "sus", "good": "fire", "funny": "lowkey hilarious", "amazing": "goated",
}

def normal_mode(text):
    """Returns the text in its normal form after lemmatization."""
    return " ".join([lemmatizer.lemmatize(w) for w in word_tokenize(text)])

def gen_z_mode(text):
    """Converts text into Gen Z slang."""
    words = word_tokenize(text.lower())
    return " ".join([GEN_Z_DICT.get(w, w) for w in words])

def translate():
    text = input("\n💬 Enter a sentence: ")
    print(f"\n✅ **Normal:** {normal_mode(text)}")
    print(f"\n🔥 **Gen Z Mode:** {gen_z_mode(text)}\n")

translate()


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!



💬 Enter a sentence: I love my word, but it gets boring sometimes

✅ **Normal:** I love my word , but it get boring sometimes

🔥 **Gen Z Mode:** i obsessed my word , but it gets mid sometimes



In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

nltk.download('punkt')
nltk.download('wordnet')

lemmatizer = WordNetLemmatizer()

GEN_Z_DICT = {
    "money": "bread", "work": "grind", "friend": "bestie", "tired": "exhausted af",
    "smart": "big-brain", "cool": "vibe check passed", "boring": "mid",
    "excited": "hyped", "love": "obsessed", "understand": "get it, bestie",
    "bad": "sus", "good": "fire", "funny": "lowkey hilarious", "amazing": "goated",
}

BOOMER_DICT = {
    "money": "hard-earned cash", "work": "a 9-to-5 job", "friend": "pal", "tired": "worn out",
    "smart": "book smart", "cool": "neat", "boring": "dull",
    "excited": "thrilled", "love": "truly admire", "understand": "comprehend",
    "bad": "not up to the mark", "good": "decent", "funny": "a real hoot", "amazing": "astonishing",
}

CORPORATE_DICT = {
    "money": "capital", "work": "workflow optimization", "friend": "stakeholder", "tired": "resource depletion",
    "smart": "data-driven", "cool": "innovative", "boring": "low engagement",
    "excited": "strategically motivated", "love": "synergize with", "understand": "leverage insights",
    "bad": "off-track", "good": "best-in-class", "funny": "engagement-boosting", "amazing": "disruptive",
}

def translate_mode(text, dictionary):
    """Converts text using a given slang dictionary."""
    words = word_tokenize(text.lower())
    return " ".join([dictionary.get(w, w) for w in words])

def translate():
    text = input("\n💬 Enter a sentence: ")
    print(f"\n✅ **Normal:** {text}")
    print(f"\n🔥 **Gen Z Mode:** {translate_mode(text, GEN_Z_DICT)}")
    print(f"\n👴 **Boomer Mode:** {translate_mode(text, BOOMER_DICT)}")
    print(f"\n💼 **Corporate Mode:** {translate_mode(text, CORPORATE_DICT)}\n")

translate()


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!



💬 Enter a sentence: I love my work but it can be boring sometimes

✅ **Normal:** I love my work but it can be boring sometimes

🔥 **Gen Z Mode:** i obsessed my grind but it can be mid sometimes

👴 **Boomer Mode:** i truly admire my a 9-to-5 job but it can be dull sometimes

💼 **Corporate Mode:** i synergize with my workflow optimization but it can be low engagement sometimes

