In [None]:
import pandas as pd
import numpy as np
import json
import plotly.express as px
import plotly.io as pio

In [None]:
# 1. Load your CSV
df = pd.read_csv('embeddings_output_filtered.csv')

# 2. Parse the embedding column into a NumPy array
def parse_emb(e):
    if isinstance(e, str):
        # if it's a JSON string like "[0.12, 0.34, …]"
        return np.array(json.loads(e))
    elif isinstance(e, (list, tuple, np.ndarray)):
        return np.array(e)
    else:
        raise ValueError("Unknown embedding format")

df['emb_vec'] = df['embedding'].apply(parse_emb)
emb_matrix = np.vstack(df['emb_vec'].values)   # shape (N, D)

# 3. Define your three conceptual axes
axes = {

    'Ad/PR/Brand': [
        'advertisement', 'branding', 'brand strategy', 'brand identity', 'brand equity',
        'campaign', 'creative concept', 'tagline', 'logo', 'influencer marketing',
        'social media marketing', 'content marketing', 'guerilla marketing', 'experiential marketing',
        'public relations', 'press release', 'media buying', 'ad buying', 'digital advertising',
        'display advertising', 'programmatic', 'out-of-home advertising', 'direct marketing',
        'sponsorship', 'storytelling', 'narrative', 'reputation management', 'corporate communications',
        'brand positioning', 'brand messaging'
    ],

    'Media/Production': [
        'media', 'production', 'film production', 'video production', 'audio production',
        'post-production', 'editing', 'sound design', 'color grading', 'visual effects',
        'motion graphics', 'cinematography', 'lighting design', 'camera operation',
        'studio recording', 'live streaming', 'broadcast', 'videography', 'podcasting',
        'scriptwriting', 'storyboarding', 'directing', 'producing', 'location scouting',
        'voice-over', 'video encoding', 'transcoding', 'production management'
    ],

    'Design/UX/Web': [
        'graphic design', 'UI design', 'UX design', 'user experience', 'user interface',
        'web design', 'responsive design', 'mobile-first design', 'interaction design',
        'wireframing', 'prototyping', 'visual design', 'typography', 'layout design',
        'information architecture', 'navigation design', 'accessibility', 'usability testing',
        'user research', 'design systems', 'pattern library', 'iconography', 'illustration',
        'front-end development', 'HTML5', 'CSS3', 'JavaScript', 'SaaS design',
        'product design', 'branding design'
    ]


}

# 4. Compute each axis vector by averaging embeddings of rows whose text matches any keyword
axis_vecs = {}
for name, kws in axes.items():
    pattern = '|'.join(kws)
    mask = df['text'].str.contains(pattern, case=False, na=False)
    if not mask.any():
        raise RuntimeError(f"No services matching axis keywords for '{name}': {kws}")
    axis_vecs[name] = emb_matrix[mask.values].mean(axis=0)

# 5. Orthonormalize the 3 axis vectors so they form a right-angled basis
A = np.vstack(list(axis_vecs.values()))  # shape (3, D)
Q, _ = np.linalg.qr(A.T)                 # Q is (D, 3), orthonormal columns
proj_matrix = Q.T                        # shape (3, D)

# 6. Project all your service embeddings into this 3D space
proj_points = emb_matrix.dot(proj_matrix.T)  # shape (N, 3)
df[['x','y','z']] = proj_points

# 7. Plot interactively with Plotly Express
fig = px.scatter_3d(
    df,
    x='x', y='y', z='z',
    hover_name='text',
    title='Services on Ad/PR vs Media vs Design axes'
)

# 7.1 Make markers smaller and semi-transparent
fig.update_traces(marker=dict(size=2.3, opacity=0.7))

# 7.2 Stretch the axes ranges out more (e.g. 1.5× instead of 1.2×)
stretch = 1
fig.update_layout(
    margin=dict(l=20, r=20, t=50, b=20),
    scene=dict(
        xaxis=dict(
            title='Ad/PR/Brand',
            range=[df['x'].min() * stretch, df['x'].max() * stretch]
        ),
        yaxis=dict(
            title='Media/Production',
            range=[df['y'].min() * stretch, df['y'].max() * stretch]
        ),
        zaxis=dict(
            title='Design/UX/Web',
            range=[df['z'].min() * stretch, df['z'].max() * stretch]
        ),
        aspectmode='manual',
        aspectratio=dict(x=1.5, y=1.5, z=1.0),
        camera=dict(eye=dict(x=1.7, y=1.7, z=0.8))
    )
)

pio.renderers.default = "browser"
fig.show()