### Load dataset

In [None]:
import os
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

os.environ.get("OPENAI_API_KEY")

In [None]:
import pandas as pd

df = pd.read_csv("./questions.csv", sep=";")

df.head(3)

In [None]:
df.columns

In [None]:
categories = sorted(df["category"].unique())
categories

In [None]:
from openai.embeddings_utils import get_embeddings

matrix = get_embeddings(df["answer"].to_list(), engine="text-embedding-ada-002")

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=3)
vis_dims = pca.fit_transform(matrix)
df["embed_vis"] = vis_dims.tolist()

In [None]:
import plotly.graph_objs as go

fig = go.Figure()

for i, cat in enumerate(categories):
    sub_matrix = np.array(df[df["category"] == cat]["embed_vis"].to_list())
    x = sub_matrix[:, 0]
    y = sub_matrix[:, 1]
    z = sub_matrix[:, 2]

    fig.add_trace(
        go.Scatter3d(
            x=x,
            y=y,
            z=z,
            mode="markers",
            marker=dict(size=5, color=i, colorscale="Viridis", opacity=0.8),
            name=cat,
        )
    )

fig.update_layout(
    autosize=False,
    title="3D Scatter Plot of Categories",
    width=800,
    height=500,
    margin=dict(l=50, r=50, b=100, t=100, pad=10),
    scene=dict(
        xaxis=dict(title="x"),
        yaxis=dict(title="y"),
        zaxis=dict(title="z"),
    ),
)

fig.show()