# Recommendation using Embeddings and Nearest Neighbor Search

This notebook is **Google Colab ready** and implements a content-based recommender system using OpenAI embeddings.

In [None]:
!pip install --upgrade openai pandas numpy scikit-learn tqdm gradio

## Set OpenAI API Key

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_API_KEY"

## Imports and Configuration

In [None]:
import pandas as pd
import numpy as np
import pickle
from tqdm import tqdm
from sklearn.metrics.pairwise import cosine_similarity
from openai import OpenAI

client = OpenAI()
EMBEDDING_MODEL = "text-embedding-3-small"
CACHE_PATH = "embedding_cache.pkl"

## Load Dataset (Upload AG_news_samples.csv)

In [None]:
df = pd.read_csv("AG_news_samples.csv")
df.head()

## Load or Initialize Embedding Cache

In [None]:
try:
    with open(CACHE_PATH, "rb") as f:
        embedding_cache = pickle.load(f)
except FileNotFoundError:
    embedding_cache = {}

## Embedding Function

In [None]:
def get_embedding(text, model=EMBEDDING_MODEL):
    if (text, model) not in embedding_cache:
        response = client.embeddings.create(
            model=model,
            input=text
        )
        embedding_cache[(text, model)] = response.data[0].embedding
        with open(CACHE_PATH, "wb") as f:
            pickle.dump(embedding_cache, f)
    return embedding_cache[(text, model)]

## Generate Embeddings

In [None]:
descriptions = df["description"].tolist()
embeddings = []
for text in tqdm(descriptions):
    embeddings.append(get_embedding(text))
embeddings = np.array(embeddings)

## Recommendation Function

In [None]:
def recommend(index, k=5):
    query = embeddings[index].reshape(1, -1)
    scores = cosine_similarity(query, embeddings)[0]
    ranked = scores.argsort()[::-1]
    print("SOURCE ARTICLE:\n")
    print(descriptions[index])
    print("\n----------------------------\n")
    count = 0
    for i in ranked:
        if i == index:
            continue
        count += 1
        print(f"Recommendation #{count}")
        print(descriptions[i])
        print(f"Similarity: {scores[i]:.3f}\n")
        if count >= k:
            break

## Gradio Interface

In [None]:
import gradio as gr

def get_recommendations(article_index, num_recommendations):
    """Get recommendations based on article index"""
    try:
        article_index = int(article_index)
        num_recommendations = int(num_recommendations)
        
        if article_index < 0 or article_index >= len(descriptions):
            return "Error: Invalid article index. Please enter a value between 0 and " + str(len(descriptions) - 1)
        
        if num_recommendations < 1 or num_recommendations > 10:
            return "Error: Number of recommendations should be between 1 and 10"
        
        query = embeddings[article_index].reshape(1, -1)
        scores = cosine_similarity(query, embeddings)[0]
        ranked = scores.argsort()[::-1]
        
        result = "ðŸ“° **SOURCE ARTICLE:**\n\n"
        result += descriptions[article_index] + "\n\n"
        result += "---\n\n"
        result += "âœ¨ **RECOMMENDATIONS:**\n\n"
        
        count = 0
        for i in ranked:
            if i == article_index:
                continue
            count += 1
            result += f"**#{count}** | Similarity: {scores[i]:.3f}\n\n"
            result += descriptions[i] + "\n\n---\n\n"
            if count >= num_recommendations:
                break
        
        return result
    except Exception as e:
        return f"Error: {str(e)}"

# Create Gradio interface
iface = gr.Interface(
    fn=get_recommendations,
    inputs=[
        gr.Slider(
            minimum=0,
            maximum=len(descriptions) - 1,
            step=1,
            label="Article Index",
            info=f"Select an article (0-{len(descriptions) - 1})"
        ),
        gr.Slider(
            minimum=1,
            maximum=10,
            step=1,
            value=5,
            label="Number of Recommendations",
            info="How many recommendations do you want?"
        )
    ],
    outputs=gr.Markdown(label="Results"),
    title="ðŸ“š Content-Based Recommendation System",
    description="Find similar articles using OpenAI embeddings and cosine similarity",
    theme=gr.themes.Soft(),
    allow_flagging="never"
)

# Launch the interface
iface.launch(share=True)