In [1]:
#3 Skip Gram Model
import tensorflow as tf
import numpy as np

# Parameters
corpus = ["I like playing football with my friends",
          "I enjoy playing tennis",
          "I hate swimming",
          "I love basketball"]
window_size, embedding_dim, batch_size, epochs, lr = 3, 50, 16, 100, 0.01

# Tokenize
tok = tf.keras.preprocessing.text.Tokenizer()
tok.fit_on_texts(corpus)
seqs = tok.texts_to_sequences(corpus)
vocab_size = len(tok.word_index) + 1

# Generate skip-gram pairs
pairs = [[w, c] for seq in seqs for i, w in enumerate(seq)
         for j in range(max(0, i - window_size), min(len(seq), i + window_size + 1)) if i != j and (c := seq[j])]
pairs = np.array(pairs)
x_train, y_train = pairs[:, 0], pairs[:, 1]

# Build and train model
inp = tf.keras.Input(shape=(1,))
x = tf.keras.layers.Embedding(vocab_size, embedding_dim)(inp)
x = tf.keras.layers.Flatten()(x)
out = tf.keras.layers.Dense(vocab_size, activation='softmax')(x)
model = tf.keras.Model(inp, out)
model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(lr))
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=0)

# Get word vector
embeddings = model.layers[1].get_weights()[0]
get_vector = lambda word: embeddings[tok.word_index[word]] if word in tok.word_index else None
word = "football"
print(f"Vector representation of '{word}': {get_vector(word)}")

# Get context words
def get_context_words(word):
    if word not in tok.word_index: return []
    idx = tok.word_index[word]
    context_ids = range(max(1, idx - window_size), min(vocab_size, idx + window_size + 1))
    return [w for w, i in tok.word_index.items() if i in context_ids]

focus_word = "playing"
print(f"Context words for '{focus_word}': {get_context_words(focus_word)}")

Vector representation of 'football': [-0.25216895 -0.28408045 -0.38735878 -0.19149698 -0.22130427  0.24655208
  0.05013163  0.19267978 -0.03761662 -0.5207101  -0.12992077  0.02772531
  0.12215419  0.33139762  0.25879356  0.31256044  0.19376655  0.10897477
  0.19096015 -0.0152914  -0.28872964  0.21509966  0.29502094 -0.29329804
 -0.18732865 -0.5092227  -0.50656354  0.13034034  0.5461856  -0.23984843
 -0.22297353  0.3279536   0.49203598 -0.3975505  -0.15423676  0.1424103
 -0.13092676  0.12414695 -0.4346669  -0.42910096 -0.4170664  -0.16287827
  0.13415605  0.23163262 -0.1866648  -0.14442295  0.06076226  0.01882784
  0.26654524  0.3160233 ]
Context words for 'playing': ['i', 'playing', 'like', 'football', 'with']
