# Gen-Z Slang Generator

## Setup

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/gemma2/keras/gemma2_2b_en/1/config.json
/kaggle/input/gemma2/keras/gemma2_2b_en/1/tokenizer.json
/kaggle/input/gemma2/keras/gemma2_2b_en/1/metadata.json
/kaggle/input/gemma2/keras/gemma2_2b_en/1/model.weights.h5
/kaggle/input/gemma2/keras/gemma2_2b_en/1/assets/tokenizer/vocabulary.spm
/kaggle/input/slang-data/all_slang_only_words.csv


In [2]:
!pip install -q -U keras-nlp
!pip install -q -U "keras>=3"

In [3]:
os.environ["KERAS_BACKEND"]= 'jax'
os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"]="1.00"

In [4]:
import keras
import keras_nlp
import pandas as pd

## Load Dataset

In [5]:
slang_dataset = pd.read_csv('/kaggle/input/slang-data/all_slang_only_words.csv')
slang_dataset

Unnamed: 0,Slang,Description,Example,Context
0,W,Shorthand for win,"Got the job today, big W!",Typically used in conversations to celebrate s...
1,L,Shorthand for loss/losing,"I forgot my wallet at home, that’s an L.",Often used when referring to a failure or mish...
2,L+ratio,Response to a comment or action on the interne...,Your tweet got 5 likes and 100 replies calling...,Popularized on social media platforms to signi...
3,Dank,excellent or of very high quality,That meme is so dank!,Commonly used in internet slang to refer to me...
4,Cheugy,Derogatory term for Millennials. Used when mil...,"That phrase is so cheugy, no one says that any...",Used to refer to things that were once popular...
...,...,...,...,...
1774,ZH,Sleeping Hour,"It’s ZH, goodnight!",Refers to the time when someone usually goes t...
1775,ZOMG,Oh My God,"ZOMG, I can’t believe you did that!","An exaggerated or enthusiastic version of ""OMG..."
1776,ZOT,Zero tolerance,Our school has a ZOT policy for bullying.,Refers to a strict policy where certain behavi...
1777,ZUP,What’s up?,"Hey, ZUP with you today?",A casual way to ask how someone is doing or wh...


In [6]:
slang_dataset.head()

Unnamed: 0,Slang,Description,Example,Context
0,W,Shorthand for win,"Got the job today, big W!",Typically used in conversations to celebrate s...
1,L,Shorthand for loss/losing,"I forgot my wallet at home, that’s an L.",Often used when referring to a failure or mish...
2,L+ratio,Response to a comment or action on the interne...,Your tweet got 5 likes and 100 replies calling...,Popularized on social media platforms to signi...
3,Dank,excellent or of very high quality,That meme is so dank!,Commonly used in internet slang to refer to me...
4,Cheugy,Derogatory term for Millennials. Used when mil...,"That phrase is so cheugy, no one says that any...",Used to refer to things that were once popular...


In [7]:
slang_data = []

for index, row in slang_dataset.iterrows():
    # Instruction prompts the user to input the context
    instruction = (
        "Given the context below, create a new Gen Z slang term. "
        "The slang should be catchy, easy to use, and relevant to modern youth culture. "
        "Make sure it's something that would feel natural in casual conversation:\n\n"
        "Context: " + row['Context'],
        "Make sure that you should provide slang, description, and example as given."
    )

    # Response provides the description and example for the slang
    response = (
        "Slang: {slang}\n\n"
        "Description: {description}\n\n"
        "Example: {example}".format(
            slang=row['Slang'],
            description=row['Description'],
            example=row['Example']
        )
    )

    template = "Instruction:\n{instruction}\n\nResponse:\n{response}"
    slang_data.append(template.format(instruction=instruction, response=response))

## Load Model + LoRA fine-tuning

In [8]:
gemma_lm = keras_nlp.models.GemmaCausalLM.from_preset("gemma2_2b_en")

normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.


In [9]:
gemma_lm.backbone.enable_lora(rank=8)

In [10]:
gemma_lm.summary()

In [11]:
# Limit the input sequence length to 256 (to control memory usage)
gemma_lm.preprocessor.sequence_length = 256

# Use AdamW (optimizer for transformer models)
optimizer = keras.optimizers.AdamW(
    learning_rate = 5e-5,
    weight_decay = 0.01,
)

# Exclude layernorm and bias terms from decay
optimizer.exclude_from_weight_decay(var_names=['bias', 'scale'])

gemma_lm.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=optimizer,
    weighted_metrics=[keras.metrics.SparseCategoricalAccuracy()],
)

gemma_lm.fit(slang_data, epochs=10, batch_size=1)

Epoch 1/10
[1m1779/1779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m820s[0m 444ms/step - loss: 0.4978 - sparse_categorical_accuracy: 0.7820
Epoch 2/10
[1m1779/1779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m791s[0m 434ms/step - loss: 0.2678 - sparse_categorical_accuracy: 0.8667
Epoch 3/10
[1m1779/1779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m772s[0m 434ms/step - loss: 0.2533 - sparse_categorical_accuracy: 0.8713
Epoch 4/10
[1m1779/1779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m771s[0m 434ms/step - loss: 0.2386 - sparse_categorical_accuracy: 0.8764
Epoch 5/10
[1m1779/1779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m771s[0m 434ms/step - loss: 0.2224 - sparse_categorical_accuracy: 0.8832
Epoch 6/10
[1m1779/1779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m771s[0m 433ms/step - loss: 0.2052 - sparse_categorical_accuracy: 0.8912
Epoch 7/10
[1m1779/1779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m771s[0m 433ms/step - loss: 0.1873 - sparse_categorical_ac

<keras.src.callbacks.history.History at 0x794bcc235390>

## Inference 

In [15]:
tag = (
    "Given the context below, create a new slang term. "
    "The slang should be catchy, easy to use, and relevant to modern youth culture. "
    "Make sure it's something that would feel natural in casual conversation:\n\n"
)

context = "You're hanging out with friends at school just chatting in recess."

condition = "You should suggest a new slang and its definition, also give one example using that slang for clarification. Example should be long and also precise."

prompt = template.format(
    instruction = tag + context + condition,
    response="",
)

sampler = keras_nlp.samplers.TopKSampler(k=10, seed=2)
gemma_lm.compile(sampler=sampler)

output = gemma_lm.generate(prompt, max_length=512)

print(output)

Instruction:
Given the context below, create a new slang term. The slang should be catchy, easy to use, and relevant to modern youth culture. Make sure it's something that would feel natural in casual conversation:

You're hanging out with friends at school just chatting in recess.You should suggest a new slang and its definition, also give one example using that slang for clarification. Example should be long and also precise.

Response:
Slang: YYS

Definition: Year since graduation

Example: He’s been out of YYS, time flies!


In [17]:
tag = (
    "Given the context below, create a new slang term. "
    "The slang should be catchy, easy to use, and relevant to modern youth culture. "
    "Make sure it's something that would feel natural in casual conversation:\n\n"
)

context = "You're at your office working on your project with your teammates. "

condition = "You should suggest a new slang and its definition, also give one example using that slang for clarification. Example should be long and also precise."

prompt = template.format(
    instruction = tag + context + condition,
    response="",
)

sampler = keras_nlp.samplers.TopKSampler(k=10, seed=2)
gemma_lm.compile(sampler=sampler)

output = gemma_lm.generate(prompt, max_length=512)

print(output)

Instruction:
Given the context below, create a new slang term. The slang should be catchy, easy to use, and relevant to modern youth culture. Make sure it's something that would feel natural in casual conversation:

You're at your office working on your project with your teammates. You should suggest a new slang and its definition, also give one example using that slang for clarification. Example should be long and also precise.

Response:
Slang: YOTW

Meaning: Your office at work

Example: YOTW, let’s brainstorm some ideas.


In [18]:
tag = (
    "Given the context below, create a new slang term. "
    "The slang should be catchy, easy to use, and relevant to modern youth culture. "
    "Make sure it's something that would feel natural in casual conversation:\n\n"
)

context = "You're hanging out with friends at a restaurant drinking wine."

condition = "You should suggest a new slang and its definition, also give one example using that slang for clarification. Example should be long and also precise."

prompt = template.format(
    instruction = tag + context + condition,
    response="",
)

sampler = keras_nlp.samplers.TopKSampler(k=10, seed=2)
gemma_lm.compile(sampler=sampler)

output = gemma_lm.generate(prompt, max_length=512)

print(output)

Instruction:
Given the context below, create a new slang term. The slang should be catchy, easy to use, and relevant to modern youth culture. Make sure it's something that would feel natural in casual conversation:

You're hanging out with friends at a restaurant drinking wine.You should suggest a new slang and its definition, also give one example using that slang for clarification. Example should be long and also precise.

Response:
Slang: WYIWINE

Definition: Your wife would not want you to be drinking wine

Example: It’s late, WYIWINE, let’s call it a night.


## Upload Model to HuggingFace

In [20]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [21]:
from huggingface_hub import HfApi
import os

# Save the model
save_directory = "/kaggle/working"
model_name = "slang-generator.keras"
model_path = os.path.join(save_directory, model_name)
gemma_lm.save(model_path)

In [22]:
from huggingface_hub import create_repo

repo_name = "genz-slang-generator"
create_repo(repo_name)

RepoUrl('https://huggingface.co/SeoyeonPark1223/genz-slang-generator', endpoint='https://huggingface.co', repo_type='model', repo_id='SeoyeonPark1223/genz-slang-generator')

In [25]:
# Initialize the API
api = HfApi()

# Define the repository ID
repo_id = "SeoyeonPark1223/genz-slang-generator"

# Upload the model
api.upload_file(
    path_or_fileobj=model_path,
    path_in_repo=model_name,
    repo_id=repo_id,
    repo_type="model"
)

print(f"Model uploaded successfully to {repo_id}")

slang-generator.keras:   0%|          | 0.00/10.5G [00:00<?, ?B/s]

Model uploaded successfully to SeoyeonPark1223/genz-slang-generator
