In [1]:
import json
import numpy as np
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import seaborn as sns
from feedparser import parse 

In [2]:
def get_medium_blogs():
    feed = parse('https://medium.com/feed/@brechterlaurin')
    feeds_parsed = []

    for i, entry in enumerate(feed['entries']):
        feeds_parsed.append({
            'title': entry['title'],
            # 'description': entry['summary'],
            'link': entry['link'],
            'date': entry['published'],
            'tags': [tag['term'] for tag in entry['tags']],
            # 'data': {
            #     'id': i,
            #     'label': entry['title'],
            #     'href': entry['link'],
            # }
        })

    print(entry.keys())

    json.dump(feeds_parsed, open('/home/laurinbrechter/Documents/Code/portfolio-website/src/app/[lang]/blog/medium_blogs.json', 'w'))

    return feeds_parsed

medium_blogs = get_medium_blogs()

dict_keys(['title', 'title_detail', 'links', 'link', 'id', 'guidislink', 'tags', 'authors', 'author', 'author_detail', 'published', 'published_parsed', 'updated', 'updated_parsed', 'content', 'summary'])


In [3]:
blog_json = json.loads(open('/home/laurinbrechter/Documents/Code/portfolio-website/src/app/[lang]/blog/own_blogs.json').read())

In [6]:
all_blogs = blog_json + medium_blogs
for i in range(len(all_blogs)):
    all_blogs[i]['id'] = i

In [5]:
json.dump(all_blogs, open('/home/laurinbrechter/Documents/Code/portfolio-website/src/app/[lang]/blog/allBlogs.json', 'w'))

In [8]:
from openai import OpenAI
client = OpenAI(api_key=OPENAI_API_KEY)

response = client.embeddings.create(
    input=[b['title'] + ' '.join(b['tags']) for b in all_blogs],
    model="text-embedding-3-small"
)

r = response

In [9]:
embeddings = np.array([e.embedding for e in r.data])

In [10]:
pairwise_dist = np.linalg.norm(embeddings - embeddings[:, None], axis=-1)

In [None]:
sns.heatmap(pairwise_dist)

In [12]:
import networkx as nx
import matplotlib.pyplot as plt

In [None]:
g = nx.relaxed_caveman_graph(10, 5, 0.2)
fig, ax = plt.subplots(1, 1, figsize=(15, 10))
pos = nx.spring_layout(g)
nx.draw_networkx_nodes(g, pos=pos, ax=ax, node_color='black')
nx.draw_networkx_edges(g, pos=pos, ax=ax)


In [14]:
k_nearest = np.argsort(pairwise_dist, axis=1)[:,1:2]
titles = [b['title'] for b in all_blogs]
nearest_dist = np.array([pairwise_dist[i, k_nearest[i]] for i in range(len(all_blogs))])
links = [b.get('link', '') for b in all_blogs]

In [15]:
g = nx.Graph()
for i, neighbors in enumerate(k_nearest):
    for neighbor in neighbors:
        g.add_edge(i, neighbor)

In [None]:
layout = nx.spring_layout(g)
nx.draw(g, pos=layout, node_color='black')
nx.draw_networkx_labels(g, pos=layout, labels={i: titles[i] for i in range(len(titles))})

In [17]:
# put it in cytoscape format

nodes = [{'data': {
    'id': str(i), 
    'label': titles[i], 
    'href': links[i],
    'tags': all_blogs[i]['tags']
    }} for i in range(len(titles))]
json.dump(nodes, open('/home/laurinbrechter/Documents/Code/portfolio-website/src/app/[lang]/blog/graphNodes.json', 'w'))

In [18]:
edges = []

# add distance to edges
for i, neighbors in enumerate(k_nearest):
    for neighbor in neighbors:
        edges.append({'data': {'source': str(i), 'target': str(neighbor), 'distance': int(list(nearest_dist[i])[0]*3)}})
json.dump(edges, open('/home/laurinbrechter/Documents/Code/portfolio-website/src/app/[lang]/blog/graphEdges.json', 'w'))

In [23]:
elements = nodes + edges

In [24]:
# save as json
with open('/home/laurinbrechter/Documents/Code/portfolio-website/src/app/[lang]/blog/graphData.json', 'w') as f:
    json.dump(elements, f)