#ND

In [2]:
%pip install numpy tensorflow-macos tensorflow-metal 

Collecting tensorflow-macos
  Downloading tensorflow_macos-2.16.2-cp310-cp310-macosx_12_0_arm64.whl (2.1 kB)
Collecting tensorflow-metal
  Downloading tensorflow_metal-1.2.0-cp310-cp310-macosx_12_0_arm64.whl (1.4 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m[31m2.6 MB/s[0m eta [36m0:00:01[0m
[?25hCollecting tensorflow==2.16.2
  Downloading tensorflow-2.16.2-cp310-cp310-macosx_12_0_arm64.whl (227.0 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.0/227.0 MB[0m [31m947.8 kB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:05[0m
[?25hCollecting requests<3,>=2.21.0
  Using cached requests-2.32.3-py3-none-any.whl (64 kB)
Collecting ml-dtypes~=0.3.1
  Downloading ml_dtypes-0.3.2-cp310-cp310-macosx_10_9_universal2.whl (389 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m389.8/389.8 kB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0

In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense
import io

# Step 1: Sample dataset
sentences = [
    "I love this movie",
    "This is an amazing movie",
    "I hate this movie",
    "This is a terrible movie",
    "Fantastic performance by the actors",
    "The plot was boring and predictable"
]
labels = [1, 1, 0, 0, 1, 0]  # 1 = positive, 0 = negative

# Step 2: Tokenize sentences
tokenizer = Tokenizer(num_words=1000, oov_token="<OOV>")
tokenizer.fit_on_texts(sentences)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(sentences)

# Pad sequences to ensure equal length
padded_sequences = pad_sequences(sequences, padding='post', maxlen=10)

# Step 3: Define the model with an embedding layer
vocab_size = len(word_index) + 1  # Include the OOV token
embedding_dim = 16  # Size of the embedding vector

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=10),
    Flatten(),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])

# Build the model to initialize the layers
model.build(input_shape=(None, 10))


# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Step 4: Train the model
model.fit(padded_sequences, np.array(labels), epochs=10)

# Step 5: Retrieve and save the embeddings
# Get the weights from the embedding layer
embedding_layer = model.layers[0]
embeddings = embedding_layer.get_weights()[0]

# Save embeddings and metadata to files
out_v = io.open('vecs.tsv', 'w', encoding='utf-8')
out_m = io.open('meta.tsv', 'w', encoding='utf-8')

for word, idx in word_index.items():
    if idx < vocab_size:  # Ensure index is within vocab size
        vec = embeddings[idx]
        out_m.write(word + "\n")  # Save the word
        out_v.write('\t'.join([str(x) for x in vec]) + "\n")  # Save the vector

out_v.close()
out_m.close()

print("Embeddings and metadata saved to 'vecs.tsv' and 'meta.tsv'.")

# Step 6: Visualize embeddings
print("To visualize embeddings, upload 'vecs.tsv' and 'meta.tsv' to the TensorFlow Embedding Projector:")
print("https://projector.tensorflow.org/")



2025-03-23 17:30:00.965084: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2025-03-23 17:30:00.965141: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-03-23 17:30:00.965152: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2025-03-23 17:30:00.965206: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-03-23 17:30:00.965223: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/10


2025-03-23 17:30:02.602234: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.5000 - loss: 0.6898
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - accuracy: 0.5000 - loss: 0.6875
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.6667 - loss: 0.6851
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.6667 - loss: 0.6827
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accuracy: 0.6667 - loss: 0.6802
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.6667 - loss: 0.6777
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accuracy: 0.6667 - loss: 0.6752
Epoch 8/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.6667 - loss: 0.6725
Epoch 9/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms

In [None]:
# Save embeddings to a .tsv file
import io

# Get the weights from the embedding layer
embedding_layer = model.layers[0]
embeddings = embedding_layer.get_weights()[0]

# Save the embeddings
out_v = io.open('vecs.tsv', 'w', encoding='utf-8')
out_m = io.open('meta.tsv', 'w', encoding='utf-8')

for word, idx in word_index.items():
    if idx < vocab_size:  # Ensure we're within the vocab size
        vec = embeddings[idx]
        out_m.write(word + "\n")  # Save the word
        out_v.write('\t'.join([str(x) for x in vec]) + "\n")  # Save the vector

out_v.close()
out_m.close()

print("Embeddings and metadata saved to 'vecs.tsv' and 'meta.tsv'.")
