In [3]:
# Quick fix for your specific error
import os
import requests
import zipfile
import io
from gensim.scripts.glove2word2vec import glove2word2vec
from gensim.models import KeyedVectors

# Create directory for downloaded files
os.makedirs('downloaded_model', exist_ok=True)



In [4]:
# Download GloVe vectors
print("Downloading GloVe vectors...")
url = "https://nlp.stanford.edu/data/glove.6B.zip"
response = requests.get(url)
z = zipfile.ZipFile(io.BytesIO(response.content))
z.extractall("downloaded_model")

# Convert to word2vec format
glove_input_file = 'downloaded_model/glove.6B.100d.txt'
word2vec_output_file = 'downloaded_model/glove.6B.100d.word2vec.txt'

print("Converting GloVe format to Word2Vec format...")
glove2word2vec(glove_input_file, word2vec_output_file)




Downloading GloVe vectors...
Converting GloVe format to Word2Vec format...


  glove2word2vec(glove_input_file, word2vec_output_file)


(400000, 100)

In [5]:
# Load and test the model
print("Loading the model...")
model = KeyedVectors.load_word2vec_format(word2vec_output_file)
print(f"Model loaded successfully with {len(model.key_to_index)} words")


Loading the model...
Model loaded successfully with 400000 words


In [6]:
# Test the model
print("\nWords similar to 'computer':")
similar_words = model.most_similar('computer', topn=5)
for word, score in similar_words:
    print(f"  {word}: {score:.4f}")




Words similar to 'computer':
  computers: 0.8752
  software: 0.8373
  technology: 0.7642
  pc: 0.7366
  hardware: 0.7290


In [12]:
from huggingface_hub import HfApi
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

api = HfApi(token=os.getenv("HF_TOKEN"))
api.upload_folder(
    folder_path="downloaded_model",
    repo_id="nodozi/glove.6B.100d.word2vec.txt",
    repo_type="dataset",
)

2025_04_18__14_41_55.5.cbow.pth:   0%|          | 0.00/65.2M [00:00<?, ?B/s]



[A[A[A[A

[A[A
[A


2025_04_18__14_41_55.5.cbow.pth:   1%|          | 377k/65.2M [00:00<00:17, 3.72MB/s]



[A[A[A[A


[A[A[A
2025_04_18__14_41_55.5.cbow.pth:   4%|▍         | 2.69M/65.2M [00:00<00:04, 14.5MB/s]
[A


[A[A[A
[A


[A[A[A



2025_04_18__14_41_55.5.cbow.pth:   7%|▋         | 4.62M/65.2M [00:00<00:06, 9.33MB/s]


[A[A[A



2025_04_18__14_41_55.5.cbow.pth:  10%|█         | 6.62M/65.2M [00:00<00:04, 11.8MB/s]


[A[A[A



2025_04_18__14_41_55.5.cbow.pth:  16%|█▌        | 10.6M/65.2M [00:00<00:02, 19.2MB/s]
[A
[A


2025_04_18__14_41_55.5.cbow.pth:  20%|█▉        | 12.9M/65.2M [00:01<00:05, 9.94MB/s]



[A[A[A[A


[A[A[A
[A


[A[A[A



2025_04_18__14_41_55.5.cbow.pth:  22%|██▏       | 14.6M/65.2M [00:01<00:05, 9.41MB/s]


[A[A[A
[A
[A
[A


2025_04_18__14_41_55.5.cbow.pth:  25%|██▍       | 16.0M/65.2M [00:01<00:08, 5.63MB/s]



[A[A[A[A


2025_04_18__

CommitInfo(commit_url='https://huggingface.co/datasets/nodozi/glove.6B.100d.word2vec.txt/commit/522adfe304bbab4fae3a36420bbee63a00bb12aa', commit_message='Upload folder using huggingface_hub', commit_description='', oid='522adfe304bbab4fae3a36420bbee63a00bb12aa', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/nodozi/glove.6B.100d.word2vec.txt', endpoint='https://huggingface.co', repo_type='dataset', repo_id='nodozi/glove.6B.100d.word2vec.txt'), pr_revision=None, pr_num=None)