In [None]:
import math
import time
from google.colab import files
from itertools import chain

import gensim
from gensim.models import Word2Vec
import json

In [None]:
class Word2Vec2SVG:
  def __init__(self, path_to_model):
    self.model = Word2Vec.load(path_to_model)

    # getting positive version of lowest value in the model data, just flattening and getting the minimum with min, abs to make positive whether negative or positive
    # all values will be increased by this so there are no negatives and the shape won't have points crossing over
    self.min = abs(min(chain.from_iterable(self.model.wv['{}'.format(i)] for i in list(self.model.wv.vocab))))    

  # using value of each part of vector as distance, equally spacing around a point
  # bumping up by dataset minimum to cancel out negatives in the vector
  def get_coordinates_for_vector(self, vector):
    points = []
    
    for c,v in enumerate(vector):
      point_angle = 2 * math.pi * c / len(vector)
      x = (v + self.min) * math.cos(point_angle)
      y = (v + self.min) * math.sin(point_angle)
      points.append([x, y])

    return points
  
  # runs the get_coordinates_for_vector method for all words in model and returns a dict with key = word and value = coordinates list
  def generate_coords_all_words(self):
    all_word_coords = {}
    vocab = list(self.model.wv.vocab)
    for i in vocab:
      all_word_coords[i] = self.get_coordinates_for_vector(self.model.wv['{}'.format(i)])
    return all_word_coords

  # auto save and download based on running in google colab
  def save_json(self, filename, data):
    with open(filename + '.json', 'w') as file_:
      json.dump(data, file_, ensure_ascii=False)
    time.sleep(200)
    files.download(filename + ".json")

  # at this point, will be about a quarter visible - the part that is both x and y positive, so need to pad
  # also turns into a string formatted to work as svg polygon points
  def make_svg_friendly(self, points, min_=None):
    if not min_:
      min_x = min([i[0] for i in points])
      min_y = min([i[1] for i in points])

      padding = abs(min([min_x, min_y]) * 1.5)
    else:
      padding = abs(min_ * 1.5)

    # other key part of method turns coordinates into a string that can go straight into svg polygon points property
    point_str = ""
    for point in points:
      point_str += ",".join([str(point[0] +padding), str(point[1] + padding)]) + " "

    return point_str.rstrip()

  def get_svg_min(self, coordinates_all_words): # coordinates_all_words will be dict with nested list values, want the smallest value from all, and can disregard keys
    all_x_coords = []
    all_y_coords = []

    for i in list(coordinates_all_words.values()):
      all_x_coords.append([j[0] for j in i])
      all_y_coords.append([j[1] for j in i])

    all_x_coords = list(chain.from_iterable(all_x_coords))
    all_y_coords = list(chain.from_iterable(all_y_coords))

    svg_min = abs(min([min(all_y_coords), min(all_x_coords)]))
    return svg_min
  
  def make_all_svg_polygon_strings(self, all_coords, offset): # all coords should be dict with word key and coordinate value as made above
    svg_dict = {}
    for k,v in all_coords.items():
      svg_dict[k] = self.make_svg_friendly(v, offset)
    return svg_dict

  # to get final result, just run this method
  def make_and_save(self, filename):
    self.all_coordinates = self.generate_coords_all_words()
    self.polygon_strings = self.make_all_svg_polygon_strings(self.all_coordinates, self.get_svg_min(self.all_coordinates) )
    self.save_json(filename, self.polygon_strings)
  

In [None]:
# # usage example:
# # create class instance passing in the trained model to use
# converter = Word2Vec2SVG("wordvec.model")
# # run this method passing in filename for the end json file
# # will automatically download if run  in google colab which will take a few minutes
# converter.make_and_save("polygon_data")