# Skip-Gram demo

This notebook demonstrates how to use skip-grams to generate word embeddings.

In [6]:
import numpy as np

def format_context(sentence, target_index, window_size):
    words = sentence.lower().split()
    start = max(0, target_index - window_size)
    end = min(len(words), target_index + window_size + 1)
    context = words[start:end]
    context[target_index - start] = '*' + context[target_index - start] + '*'
    return ' '.join(context)

def generate_skip_grams(sentence, window_size):
    words = sentence.lower().split()
    skip_grams = []
    
    for i, target_word in enumerate(words):
        context_formatted = format_context(sentence, i, window_size)
        print(f"\nContext: [ {context_formatted} ]")
        local_pairs = []
        for j in range(max(0, i - window_size), min(len(words), i + window_size + 1)):
            if i != j:
                local_pairs.append((target_word, words[j]))
                print(f"({target_word}, {words[j]})")
        skip_grams.extend(local_pairs)
    
    return skip_grams

# Example sentence
sentence = "The wide road shimmered in the hot sun"
window_size = 2
print(f"Sentence: {sentence}")
print(f"Window size: {window_size}")

print("\nSkip-gram pairs:")
skip_grams = generate_skip_grams(sentence, window_size)


# Demonstrate negative sampling
vocab = list(set(sentence.lower().split()))
vocab_size = len(vocab)

def negative_sampling(target, context, num_negative):
    negative_samples = []
    while len(negative_samples) < num_negative:
        neg = np.random.choice(vocab)
        if neg != target and neg != context:
            negative_samples.append(neg)
    return negative_samples

print("\nDemonstrating negative sampling:")
num_negative = 2
for target, context in skip_grams[:3]:  # Show for first 3 skip-grams
    negatives = negative_sampling(target, context, num_negative)
    print(f"Target: {target}")
    print(f"Positive context: {context}")
    print(f"Negative samples: {negatives}")
    print()

Sentence: The wide road shimmered in the hot sun
Window size: 2

Skip-gram pairs:

Context: [ *the* wide road ]
(the, wide)
(the, road)

Context: [ the *wide* road shimmered ]
(wide, the)
(wide, road)
(wide, shimmered)

Context: [ the wide *road* shimmered in ]
(road, the)
(road, wide)
(road, shimmered)
(road, in)

Context: [ wide road *shimmered* in the ]
(shimmered, wide)
(shimmered, road)
(shimmered, in)
(shimmered, the)

Context: [ road shimmered *in* the hot ]
(in, road)
(in, shimmered)
(in, the)
(in, hot)

Context: [ shimmered in *the* hot sun ]
(the, shimmered)
(the, in)
(the, hot)
(the, sun)

Context: [ in the *hot* sun ]
(hot, in)
(hot, the)
(hot, sun)

Context: [ the hot *sun* ]
(sun, the)
(sun, hot)

Demonstrating negative sampling:
Target: the
Positive context: wide
Negative samples: ['sun', 'shimmered']

Target: the
Positive context: road
Negative samples: ['hot', 'shimmered']

Target: wide
Positive context: the
Negative samples: ['sun', 'sun']

