## Step 1: Installing Necessary Libraries
First, you'll need to install UMAP and Matplotlib. You can do this directly in your Jupyter notebook using the following command:

In [None]:
# %pip install umap-learn matplotlib


## Step 2: Importing Libraries
Once installed, you can import the necessary libraries in your notebook:

In [43]:
import umap
import matplotlib.pyplot as plt
import numpy as np
from custom_umap import CustomUMAP
import torch
from transformers import AutoModel, AutoTokenizer

In [44]:
umap = CustomUMAP()


In [None]:

model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
sample_text = [
    "Advancements in artificial intelligence are transforming the tech industry.",
    "The latest smartphone models feature incredibly advanced camera systems.",
    "Cybersecurity is a growing concern with the rise of digital banking.",
    "Virtual reality offers immersive experiences unlike anything in traditional gaming.",
    "Self-driving cars could revolutionize how we commute in the future.",
    "Blockchain technology is not just for cryptocurrencies but also for securing data.",
    "Tech startups are increasingly focusing on sustainable energy solutions.",
    "The Amazon rainforest is home to a diverse range of wildlife and plant species.",
    "Climate change is impacting global weather patterns and ecosystems.",
    "Hiking in the mountains is a great way to connect with nature.",
    "Conservation efforts are crucial for protecting endangered species.",
    "The beauty of the ocean is unmatched, but its health is in danger.",
    "Urban gardening is becoming popular in cities for sustainability.",
    "Observing the stars provides insights into the vastness of the universe.",
    "Restoring natural habitats helps maintain ecological balance.",
    "Cooking at home can be both fun and therapeutic.",
    "Exploring different cuisines is a journey into various cultures.",
    "Baking requires precision and patience, but the results are rewarding.",
    "Fresh ingredients make a significant difference in the flavor of dishes.",
    "The art of plating food is essential in fine dining experiences.", 
    "Outerspace is pretty scary when you think about all of the stars", 
    "The solar system has 8 planets", 
    "Asteroids are very common in our galaxy", 
    "I wonder if we will ever visit the moon again"
   
]

inputs = [tokenizer(text, return_tensors="pt",
                    truncation=True, max_length=512) for text in sample_text]
outputs = []
for i in inputs:
    with torch.no_grad():
        outputs.append(model(**i))

# Assuming you want the last hidden states (token-level embeddings)
embeddings = [output[0] for output in outputs]


In [None]:

# Convert the list of tensors to a NumPy array
embeddings_np = umap.to_numpy(embeddings)




In [None]:
# Initialize the UMAP object for 2D (or 3D) reduction
reducer = umap.UMAP(n_components=2, random_state=42, n_neighbors=5)

# Fit the model to your embeddings and transform the data
reduced_embeddings = reducer.fit_transform(sentence_embeddings_np)

In [None]:
# 2D Visualization
plt.scatter(reduced_embeddings[:, 0], reduced_embeddings[:, 1])

# Adding labels and title for clarity
plt.xlabel('UMAP Dimension 1')
plt.ylabel('UMAP Dimension 2')
plt.title('2D UMAP Projection of BERT Sentence Embeddings')

# Show the plot
plt.show()