In [None]:
# Import some libraries we need.
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Copy files from github in to the local Colab filespace.
!git clone --quiet https://github.com/KCL-Health-NLP/nlp_youth_awards.git
print("Done copying files")

In [None]:
# Read in the spreadsheet.
contexts = pd.read_excel('./nlp_youth_awards/contexts.xlsx', sheet_name=None)

In [None]:
# Let's take a look at the names of the sheets that have been read in.
# These are our words.
print(contexts.keys())

In [None]:
# Let's take a look at the first few lines of one of the sheets.
# You can change this to look at others.
print(contexts['lettuce'].head(10))

In [None]:
# This function takes a word, and a dimension word.
# It looks up the number of times the dimension word
# occurs with the word. The value of the dimension
# is returned. If the dimension word is not found,
# zero is returned
def get_dimension_value(word, dimension):

  # Get the table of dimensions for this word
  word_context = contexts[word]

  # If the dimension word is in the context column of the table
  if dimension in word_context['context'].values:

    # The value of the dimension is found in the row named for that dimension
    # and in the relative-count column
    value = word_context.loc[word_context['context'] == dimension, ['relative-count']].values[0][0]

  # If the dimension word is not found in the table
  else:
    value = 0

  return value

In [None]:
# Choose some words to plot
words_to_plot = ['lettuce', 'cucumber', 'butter', 'sugar']

# Choose vector dimensions to plot the words against
x_dimension = 'bowl'
y_dimension = 'salad'

# Make an empty list to hold the vectors
vectors = []

In [None]:
# Go through the words one at a time
for word in words_to_plot:

  # Look up the values of the two dimensions
  x_value = get_dimension_value(word, x_dimension)
  y_value = get_dimension_value(word, y_dimension)

  # Add the dimensions in to the vectors dictionary
  vectors.append((word, x_value, y_value))

# Take a look at the vectors
print(vectors)



In [None]:
# Go through the vectors and plot each one
for word, x, y in vectors:

    # Plot an arrow
    plt.quiver(0, 0, x, y, angles='xy', scale_units='xy', scale=1)

    # Add a label at the end of each arrow
    plt.text(x+2, y+2, word, fontsize=8)

# Set axis labels and limits
plt.xlabel(x_dimension)
plt.ylabel(y_dimension)
plt.xlim((-1, 101))
plt.ylim((-1, 101))

# Show the graph
plt.show()
