# Recommendation using Embeddings and Nearest Neighbor Search

This notebook is **Google Colab ready** and implements a content-based recommender system using OpenAI embeddings.

In [None]:
!pip install --upgrade openai pandas numpy scikit-learn tqdm

## Set OpenAI API Key

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_API_KEY"

## Imports and Configuration

In [None]:
import pandas as pd
import numpy as np
import pickle
from tqdm import tqdm
from sklearn.metrics.pairwise import cosine_similarity
from openai import OpenAI

client = OpenAI()
EMBEDDING_MODEL = "text-embedding-3-small"
CACHE_PATH = "embedding_cache.pkl"

## Load Dataset (Upload AG_news_samples.csv)

In [None]:
df = pd.read_csv("AG_news_samples.csv")
df.head()

## Load or Initialize Embedding Cache

In [None]:
try:
    with open(CACHE_PATH, "rb") as f:
        embedding_cache = pickle.load(f)
except FileNotFoundError:
    embedding_cache = {}

## Embedding Function

In [None]:
def get_embedding(text, model=EMBEDDING_MODEL):
    if (text, model) not in embedding_cache:
        response = client.embeddings.create(
            model=model,
            input=text
        )
        embedding_cache[(text, model)] = response.data[0].embedding
        with open(CACHE_PATH, "wb") as f:
            pickle.dump(embedding_cache, f)
    return embedding_cache[(text, model)]

## Generate Embeddings

In [None]:
descriptions = df["description"].tolist()
embeddings = []
for text in tqdm(descriptions):
    embeddings.append(get_embedding(text))
embeddings = np.array(embeddings)

## Recommendation Function

In [None]:
def recommend(index, k=5):
    query = embeddings[index].reshape(1, -1)
    scores = cosine_similarity(query, embeddings)[0]
    ranked = scores.argsort()[::-1]
    print("SOURCE ARTICLE:\n")
    print(descriptions[index])
    print("\n----------------------------\n")
    count = 0
    for i in ranked:
        if i == index:
            continue
        count += 1
        print(f"Recommendation #{count}")
        print(descriptions[i])
        print(f"Similarity: {scores[i]:.3f}\n")
        if count >= k:
            break

## Test Examples

In [None]:
recommend(index=0, k=5)
recommend(index=1, k=5)