# RAG with Visualization: AI/ML Study Notes

In [1]:
import os
import faiss
import openai
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import umap
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import normalize
from dotenv import load_dotenv

In [None]:
study_notes = [
    "A Convolutional Neural Network (CNN) is a type of deep neural network designed to process data with grid-like topology such as images. It uses convolutional layers to extract spatial features.",
    "ResNet, or Residual Network, introduces skip connections to allow gradients to flow through deep networks more easily, making it possible to train models with over 100 layers.",
    "AlexNet was one of the first deep CNNs that showed the power of GPUs in training deep learning models. It uses ReLU activation and dropout.",
    "Transfer learning allows reusing a model trained on one dataset for a different but related task. Commonly used with large models like ResNet or BERT.",
    "Backpropagation computes gradients of the loss function with respect to network weights using the chain rule. It enables optimization via gradient descent.",
    "L1 regularization penalizes the absolute value of weights, encouraging sparsity. L2 regularization penalizes the squared magnitude of weights, promoting smaller weights.",
    "Overfitting occurs when a model performs well on training data but poorly on unseen data. Techniques like dropout, regularization, and early stopping can help prevent it.",
    "Dropout randomly disables neurons during training, reducing co-dependency between neurons and helping generalization.",
    "Early stopping halts training when validation loss stops improving, preventing overfitting.",
    "PyTorch is an open-source deep learning framework with dynamic computation graphs, widely used for research and production.",
    "An epoch is a full pass through the training data. A batch is a subset used to compute one update. Iterations = dataset size / batch size.",
    "Learning rate determines how big a step gradient descent takes. A high learning rate can overshoot minima, while a low one slows down learning.",
    "Cross-entropy loss is used for classification, measuring the difference between predicted probabilities and actual labels.",
    "Mean squared error (MSE) is used for regression, minimizing the square of the difference between predicted and actual values.",
    "ReLU outputs zero for negative inputs and the input itself for positives. It’s simple and effective, avoiding vanishing gradients.",
    "Batch normalization standardizes the inputs to each layer, stabilizing training and allowing higher learning rates.",
    "A confusion matrix shows TP, FP, TN, FN — it's used to understand classification errors.",
    "Precision = TP / (TP + FP), recall = TP / (TP + FN), F1-score = harmonic mean of precision and recall.",
    "Adam optimizer combines momentum and RMSProp. It’s adaptive and widely used.",
    "RMSProp adapts learning rate per parameter by maintaining a moving average of squared gradients.",
    "Learning rate schedulers reduce the LR over time or when a metric plateaus. Types include step, cosine, exponential.",
    "Fine-tuning involves training a pre-trained model further on new data. Often, lower layers are frozen and only top layers are trained.",
    "Data augmentation creates artificial training examples by altering original data — e.g., flipping, cropping, or color shifting images.",
    "Autoencoders compress data into a lower dimension (encoding) and try to reconstruct it. They're used for unsupervised learning and denoising.",
    "GANs (Generative Adversarial Networks) consist of a generator and discriminator competing, producing realistic synthetic data.",
    "Supervised learning involves labeled data. Unsupervised learning deals with patterns in unlabeled data. Semi-supervised uses both.",
    "K-means clustering groups data by minimizing distance to cluster centroids. It’s unsupervised and assumes spherical clusters.",
    "Principal Component Analysis (PCA) reduces dimensionality by projecting data to directions of maximum variance.",
    "A transformer uses self-attention to weigh input tokens differently based on context, enabling better sequence understanding.",
    "The attention mechanism computes scores between tokens to find which ones to focus on. Scaled dot-product attention is common.",
    "Positional encoding injects order into transformer inputs, since attention alone is permutation-invariant.",
    "BERT (Bidirectional Encoder Representations from Transformers) is trained using masked language modeling and next sentence prediction.",
    "GPT is a unidirectional transformer-based model trained for next-token prediction. It powers many generative NLP systems.",
    "Tokenization splits text into tokens — often words or subwords — to prepare inputs for NLP models.",
    "Word2Vec and GloVe are pre-transformer word embeddings capturing semantic similarity. Now mostly replaced by contextual embeddings.",
    "MLflow is a tool for experiment tracking, model versioning, and deployment in machine learning workflows.",
    "Docker is often used in ML pipelines to containerize code and ensure reproducibility across environments.",
    "A deployment pipeline for ML includes preprocessing, inference, logging, and monitoring. Tools like FastAPI and Flask are common.",
    "Evaluation metrics depend on task: accuracy for classification, MAE/MSE for regression, BLEU/ROUGE for NLP, IoU for object detection.",
    "Hyperparameter tuning uses tools like Optuna or Grid Search to find the best combinations of model settings.",
    "Overparameterization means the model has more parameters than needed. Surprisingly, this often improves performance when combined with good regularization."
]


In [None]:
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

In [None]:
model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(study_notes, convert_to_numpy=True)
embeddings = normalize(embeddings, axis=1)
dimension = embeddings.shape[1]
index = faiss.IndexFlatIP(dimension)
index.add(embeddings)

In [None]:
def retrieve(query, top_k=5):
    query_vec = model.encode([query], convert_to_numpy=True)
    query_vec = normalize(query_vec, axis=1)
    scores, indices = index.search(query_vec, top_k)
    return scores[0], indices[0], query_vec

In [None]:
def show_results(scores, indices):
    results = pd.DataFrame({
        "Chunk": [study_notes[i][:80] + "..." for i in indices],
        "Similarity Score": scores
    })
    display(results)
    plt.figure(figsize=(10, 5))
    sns.barplot(x=list(range(len(scores))), y=scores)
    plt.xticks(ticks=list(range(len(scores))), labels=[f"#{i}" for i in indices])
    plt.ylabel("Cosine Similarity")
    plt.title("Top-K Retrieved Chunks")
    plt.show()

In [None]:
def plot_umap(all_embeddings, query_vec, indices):
    reducer = umap.UMAP(random_state=42)
    reduced = reducer.fit_transform(np.vstack([query_vec, all_embeddings]))
    plt.figure(figsize=(10, 6))
    plt.scatter(reduced[1:, 0], reduced[1:, 1], c="gray", label="Notes")
    plt.scatter(reduced[1+indices, 0], reduced[1+indices, 1], c="blue", label="Top-k")
    plt.scatter(reduced[0, 0], reduced[0, 1], c="red", label="Query", marker="X", s=100)
    plt.legend()
    plt.title("UMAP Projection of Notes + Query")
    plt.show()

In [None]:
def generate_answer(question, context):
    joined_context = "\n".join(context)
    prompt = f"""
You are a helpful AI tutor. Based only on the provided context, answer the question.

Context:
{joined_context}

Question: {question}
Answer:
"""
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3,
            max_tokens=300
        )
        return response['choices'][0]['message']['content'].strip()
    except openai.error.OpenAIError as e:
        print("\n[OpenAI ERROR]", str(e))
        return "⚠️ Sorry, something went wrong while generating the answer." 

In [None]:
query = "What is the purpose of this RAG system?"
scores, indices, query_vec = retrieve(query)
show_results(scores, indices)
plot_umap(embeddings, query_vec, indices)
context = [study_notes[i] for i in indices]
answer = generate_answer(query, context)
print("\n--- Final Answer ---\n", answer)