# Skip-Gram demo

This notebook demonstrates how to use skip-grams to generate word embeddings.

In [35]:
import numpy as np

def generate_skip_grams(sentence, window_size):
    words = sentence.lower().split()
    skip_grams = []
    
    for i, target_word in enumerate(words):
        for j in range(max(0, i - window_size), min(len(words), i + window_size + 1)):
            if i != j:
                skip_grams.append((target_word, words[j]))
    
    return skip_grams

# Example sentence
sentence = "The wide road shimmered in the hot sun"
window_size = 2

skip_grams = generate_skip_grams(sentence, window_size)

print(f"Sentence: {sentence}")
print(f"Window size: {window_size}")
print("\nSkip-gram pairs:")
for target, context in skip_grams:
    print(f"({target}, {context})")

# Demonstrate negative sampling
vocab = list(set(sentence.lower().split()))
vocab_size = len(vocab)

def negative_sampling(target, context, num_negative):
    negative_samples = []
    while len(negative_samples) < num_negative:
        neg = np.random.choice(vocab)
        if neg != target and neg != context:
            negative_samples.append(neg)
    return negative_samples

print("\nDemonstrating negative sampling:")
num_negative = 2
for target, context in skip_grams[:3]:  # Show for first 3 skip-grams
    negatives = negative_sampling(target, context, num_negative)
    print(f"Target: {target}")
    print(f"Positive context: {context}")
    print(f"Negative samples: {negatives}")
    print()

Sentence: The wide road shimmered in the hot sun
Window size: 2

Skip-gram pairs:
(the, wide)
(the, road)
(wide, the)
(wide, road)
(wide, shimmered)
(road, the)
(road, wide)
(road, shimmered)
(road, in)
(shimmered, wide)
(shimmered, road)
(shimmered, in)
(shimmered, the)
(in, road)
(in, shimmered)
(in, the)
(in, hot)
(the, shimmered)
(the, in)
(the, hot)
(the, sun)
(hot, in)
(hot, the)
(hot, sun)
(sun, the)
(sun, hot)

Demonstrating negative sampling:
Target: the
Positive context: wide
Negative samples: ['hot', 'shimmered']

Target: the
Positive context: road
Negative samples: ['wide', 'sun']

Target: wide
Positive context: the
Negative samples: ['road', 'road']

