### 1. Use fastext after training to find similar and opposite words

In [117]:
from gensim.models import FastText

model = FastText.load("fasttext.model")

In [118]:
def print_similar_opposite_words(input_word):
    # 10 Most similar words
    print(f"Similar words to '{input_word}':")
    for word, score in model.wv.most_similar(input_word, topn=10):
        print(f"{word}: {score:.4f}")


    # 10 most opposite words   
    vocab = list(model.wv.index_to_key)

    sims = [(word, model.wv.similarity(input_word, word)) for word in vocab if word != input_word]

    opposites = sorted(sims, key=lambda x: x[1])[:10]

    print(f"\nWords least similar to '{input_word}':")
    for word, score in opposites:
        print(f"{word}: {score:.4f}")

In [119]:
print_similar_opposite_words('hard')

Similar words to 'hard':
harder: 0.6709
hardee: 0.6031
diehard: 0.6010
hardy: 0.5983
hardees: 0.5923
hardback: 0.5915
gerhard: 0.5586
hards: 0.5558
hardened: 0.5438
harden: 0.5290

Words least similar to 'hard':
et: -0.0371
vie: -0.0276
tour: -0.0221
opa: -0.0101
ni: -0.0099
park: -0.0076
series: -0.0038
court: 0.0009
kaffe: 0.0036
vi: 0.0069


In [120]:
print_similar_opposite_words('computer')

Similar words to 'computer':
compute: 0.9480
computerized: 0.8861
computing: 0.8002
compusa: 0.7481
compulsory: 0.7211
smartphone: 0.6144
smartphones: 0.6142
laptop: 0.5995
macbook: 0.5841
ipads: 0.5825

Words least similar to 'computer':
sau: -0.0832
louisian: -0.0488
carolina: -0.0485
lua: -0.0447
mais: -0.0437
louisiana: -0.0412
oli: -0.0382
lau: -0.0363
trini: -0.0308
dinic: -0.0303


In [121]:
print_similar_opposite_words('woman')

Similar words to 'woman':
saleswoman: 0.6918
lady: 0.6102
men: 0.5889
womens: 0.5608
gentleman: 0.5163
ottoman: 0.5114
cattleman: 0.4880
lipman: 0.4771
girl: 0.4721
bauman: 0.4705

Words least similar to 'woman':
ono: -0.0269
ap: -0.0253
no: -0.0210
re: -0.0071
java: -0.0067
surf: -0.0066
ao: -0.0036
noms: -0.0033
pana: -0.0019
mahi: -0.0013


### 2. Test pretrained model

In [None]:
import fasttext

def load_pretrained_fasttext():
    model = fasttext.load_model("cc.en.300.bin")
    return model

def get_similar_words(model, word, n=10):
    similar_words = model.get_nearest_neighbors(word, k=n)
    return similar_words

def get_opposite_words(model, word, n=10, search_k=10000):
    word_vector = model.get_word_vector(word)

    vocabular_sample = model.get_words(on_unicode_error='ignore')[:search_k]
    word_vectors = [model.get_word_vector(w) for w in vocabular_sample]
    
    import numpy as np
    from scipy.spatial.distance import cosine
    
    distances = [(vocabular_sample[i], cosine(word_vector, wv)) 
                 for i, wv in enumerate(word_vectors)]

    opposite_words = sorted(distances, key=lambda x: x[1], reverse=True)[:n]
    return opposite_words

def analyze_word(word):
    model = load_pretrained_fasttext()
    
    print(f"\nAnalyzing word: '{word}'")
    print("\n10 most similar words:")
    similar = get_similar_words(model, word)
    for i, (similarity, similar_word) in enumerate(similar, 1):
        print(f"{i}. {similar_word} (similarity: {similarity})")
    
    print("\n10 most opposite words:")
    opposites = get_opposite_words(model, word)
    for i, (opposite_word, distance) in enumerate(opposites, 1):
        print(f"{i}. {opposite_word} (distance: {distance})")
    
    return model

In [7]:
model = analyze_word("hard")


Analyzing word: 'hard'

10 most similar words:
1. harder (similarity: 0.674920916557312)
2. tough (similarity: 0.6622265577316284)
3. difficult (similarity: 0.6313798427581787)
4. Hard (similarity: 0.6303834915161133)
5. ahrd (similarity: 0.6172201633453369)
6. hard.It (similarity: 0.6048954725265503)
7. hard- (similarity: 0.596278727054596)
8. HARD (similarity: 0.5833104848861694)
9. hardest (similarity: 0.5808660984039307)
10. hard.Now (similarity: 0.5805425643920898)

10 most opposite words:
1. contribs (distance: 1.1475075579484346)
2. ‎ (distance: 1.1277520064021562)
3. Lake (distance: 1.126982154931729)
4. Mobile (distance: 1.1131624006276788)
5. 1917 (distance: 1.1112886911259348)
6. Events (distance: 1.1094323899945187)
7. Theatre (distance: 1.1087379045420587)
8. Thread (distance: 1.1053014727028372)
9. Generation (distance: 1.1051222621907733)
10. Gifts (distance: 1.104175079514259)


In [8]:
model = analyze_word("computer")


Analyzing word: 'computer'

10 most similar words:
1. computers (similarity: 0.7748078107833862)
2. comptuer (similarity: 0.7278218865394592)
3. compuer (similarity: 0.7249574661254883)
4. comupter (similarity: 0.6960818767547607)
5. comuter (similarity: 0.691444456577301)
6. non-computer (similarity: 0.6854090094566345)
7. computer.But (similarity: 0.6702727675437927)
8. laptop (similarity: 0.6590123176574707)
9. puter (similarity: 0.658584475517273)
10. computor (similarity: 0.65807044506073)

10 most opposite words:
1. Albert (distance: 1.102994451095535)
2. Louis (distance: 1.0950927675183888)
3. Salt (distance: 1.0922291363911278)
4. Victoria (distance: 1.0908030197837015)
5. Caroline (distance: 1.089518390431194)
6. Henry (distance: 1.0879689991239236)
7. Wedding (distance: 1.0842332038745603)
8. Heights (distance: 1.0822048295768492)
9. Season (distance: 1.0810272688124234)
10. Length (distance: 1.0793260259074449)


In [9]:
model = analyze_word("woman")


Analyzing word: 'woman'

10 most similar words:
1. man (similarity: 0.7658414840698242)
2. lady (similarity: 0.7432236075401306)
3. woman.The (similarity: 0.7007342576980591)
4. woman.It (similarity: 0.6939473748207092)
5. woman.A (similarity: 0.6926014423370361)
6. girl (similarity: 0.6882616877555847)
7. woman.If (similarity: 0.6846626400947571)
8. woman--and (similarity: 0.6815258264541626)
9. woman.She (similarity: 0.6798537373542786)
10. woman.This (similarity: 0.6764208674430847)

10 most opposite words:
1. Links (distance: 1.1595318392004987)
2. additions (distance: 1.138740179230674)
3. Offers (distance: 1.1327834049817953)
4. Notes (distance: 1.1133203080430172)
5. Projects (distance: 1.1131441914931377)
6. Activities (distance: 1.111562624990526)
7. updates (distance: 1.1088127183014034)
8. Sites (distance: 1.1076431735423027)
9. Privacy (distance: 1.1059780395432781)
10. modules (distance: 1.1048911030807542)
