In [1]:
import pandas as pd

df = pd.read_csv('lofi_prompts.csv')
df.head()

Unnamed: 0,prompt
0,Exploring the world of quantum computing
1,Jaw bone lo-fi for energetic expressions
2,Uplifting lo-fi for retirement home celebrations
3,Calm lo-fi for meditation sessions
4,Anime shopping mall lo-fi for retail therapy


In [2]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import re

def extract_keywords(text):
    """
    Extract meaningful keywords from a prompt.
    Removes common stopwords and keeps adjectives, verbs, and nouns.
    """
    # Convert to lowercase and remove special characters
    text = re.sub(r"[^a-zA-Z\s]", "", text.lower())

    # Remove common words (basic stopword filtering)
    stopwords = set(["lofi", "for", "the", "a", "and", "to", "in", "on", "with", "of", "beat", "track", "music"])
    keywords = [word for word in text.split() if word not in stopwords]

    return keywords

# Extract keywords from each prompt in the dataset
df["keywords"] = df["prompt"].apply(extract_keywords)

In [4]:
df

Unnamed: 0,prompt,keywords
0,Exploring the world of quantum computing,"[exploring, world, quantum, computing]"
1,Jaw bone lo-fi for energetic expressions,"[jaw, bone, energetic, expressions]"
2,Uplifting lo-fi for retirement home celebrations,"[uplifting, retirement, home, celebrations]"
3,Calm lo-fi for meditation sessions,"[calm, meditation, sessions]"
4,Anime shopping mall lo-fi for retail therapy,"[anime, shopping, mall, retail, therapy]"
...,...,...
495,Zen sand painting lo-fi with calming designs,"[zen, sand, painting, calming, designs]"
496,Beldum's thunder punch lo-fi for electro-magne...,"[beldums, thunder, punch, electromagnetic, ene..."
497,Zen kokedama lo-fi with moss balls,"[zen, kokedama, moss, balls]"
498,Relaxing guitar lo-fi for therapeutic sessions,"[relaxing, guitar, therapeutic, sessions]"


In [7]:


# Embed all dataset prompts
# Ensure you're selecting the correct column (e.g., 'prompt')
prompt_texts = df["prompt"].tolist()  # Convert column to list of strings

# Encode the text prompts
model = SentenceTransformer("all-MiniLM-L6-v2")
prompt_embeddings = model.encode(prompt_texts)  # Now it should work!

def suggest_better_words(user_prompt, df, top_n=3):
    """Suggests better descriptive words for a given prompt."""
    
    # Load embedding model
    model = SentenceTransformer("all-MiniLM-L6-v2")

    # Ensure 'prompt' column exists
    if "prompt" not in df.columns:
        raise ValueError("Dataset must have a 'prompt' column.")

    # Convert dataset prompts into embeddings
    prompt_texts = df["prompt"].tolist()  # Convert column to list
    prompt_embeddings = model.encode(prompt_texts)  # Embed dataset prompts

    # Embed user's prompt
    user_embedding = model.encode([user_prompt])

    # Compute cosine similarity
    similarities = cosine_similarity(user_embedding, prompt_embeddings)[0]

    # Get top N most similar prompts
    top_indices = similarities.argsort()[-top_n:][::-1]
    recommended_words = [prompt_texts[i] for i in top_indices]

    return recommended_words

# Example usage:
suggestions = suggest_better_words("rainy day lofi beat", df)
print("Suggested words:", suggestions)

Suggested words: ['Enigmatic Theremin lo-fi with rain drops', 'Dust Bowl dry spell lo-fi for devastating dust storm aftermath', 'Comforting lo-fi for night time']


In [8]:
suggest_better_words('beach day vibe', df)

['Windy seaside lo-fi with gentle waves',
 'Seaside contemplation lo-fi with gentle waves',
 'Uplifting violin lo-fi for sunny spring vibes']