In [1]:
import tensorflow as tf
import transformers
import datasets
print(f"TensorFlow version: {tf.__version__}")
print(f"Transformers version: {transformers.__version__}")
print(f"Datasets version: {datasets.__version__}")




  from .autonotebook import tqdm as notebook_tqdm


TensorFlow version: 2.15.0
Transformers version: 4.57.1
Datasets version: 4.4.1


In [2]:

from transformers import TFAutoModel, AutoTokenizer

model_name = "distilbert-base-uncased"

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = TFAutoModel.from_pretrained(
    model_name,
    use_safetensors=False
)
print("Model and tokenizer loaded successfully.")




TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.
Some layers from the model checkpoint at distilbert-base-uncased were not used when initializing TFDistilBertModel: ['vocab_layer_norm', 'vocab_transform', 'vocab_projector', 'activation_13']
- This IS expected if you are initializing TFDistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFDistilBertModel were initialized from the model checkpoint at distilbert-base-uncased.
If your task is similar to the task the model of the checkpoint was

Model and tokenizer loaded successfully.


In [10]:
#Test with simple input
text = "This is a test sentence for the model." #Test tokenizer and model

inputs = tokenizer(text, return_tensors="tf", padding=True, truncation=True)#Tokenize text

print("Tokenized inputs:")
print(f"Input IDs shape: {inputs['input_ids'].shape}")
print(f"Attention mask shape: {inputs['attention_mask'].shape}")
print(f"\nInput Ids: {inputs['input_ids']}")

# Get model output
outputs = model(inputs)
print(f"\nModel output shape: {outputs.last_hidden_state.shape}")
print("Model inference successful!")

TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.


Tokenized inputs:
Input IDs shape: (1, 11)
Attention mask shape: (1, 11)

Input Ids: [[ 101 2023 2003 1037 3231 6251 2005 1996 2944 1012  102]]

Model output shape: (1, 11, 768)
Model inference successful!


In [None]:
from datasets import load_dataset

# Load IMDB dataset (movie reviews)
print("Loading dataset...")
dataset = load_dataset("imdb")

print(f"\nDataset splits: {list(dataset.keys())}")
print(f"Training examples: {len(dataset['train'])}")
print(f"Test examples: {len(dataset['test'])}")

# Show a sample
print(f"\n--- Sample Example ---")
print(f"Text: {dataset['train'][0]['text'][:200]}...")
print(f"Label: {dataset['train'][0]['label']} (0=negative, 1=positive)")

Loading dataset...

Dataset splits: ['train', 'test', 'unsupervised']
Training examples: 25000
Test examples: 25000

--- Sample Example ---
Text: I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. I also heard that at first it was seized by U.S. customs if it ev...
Label: 0 (0=negative, 1=positive)


In [None]:
# Define preprocessing function
def preprocess_function(examples):
    return tokenizer(
        examples['text'], 
        truncation=True, 
        padding='max_length', 
        max_length=128
    )

# Apply tokenization to the entire dataset
print("Tokenizing dataset... (this may take a minute)")
tokenized_dataset = dataset.map(preprocess_function, batched=True)

print("\nTokenization complete!")
print(f"Features: {tokenized_dataset['train'].column_names}")

# Check a tokenized example
print(f"\nTokenized example shape: {len(tokenized_dataset['train'][0]['input_ids'])} tokens")

Tokenizing dataset... (this may take a minute)


Map: 100%|██████████| 25000/25000 [00:27<00:00, 894.42 examples/s] 
Map: 100%|██████████| 25000/25000 [00:30<00:00, 824.28 examples/s] 
Map: 100%|██████████| 50000/50000 [00:54<00:00, 924.20 examples/s] 


Tokenization complete!
Features: ['text', 'label', 'input_ids', 'attention_mask']

Tokenized example shape: 128 tokens





In [15]:
#6Convert to TensorFlow dataset

small_train_dataset = tokenized_dataset['train'].shuffle(seed=42).select(range(1000))
small_test_dataset = tokenized_dataset['test'].shuffle(seed=42).select(range(200))

#Convert to TF dataset
train_dataset = small_train_dataset.to_tf_dataset(
    columns=['input_ids', 'attention_mask'],
    label_cols=['label'],
    shuffle=True,
    batch_size=16,
    collate_fn=None
)

test_dataset = small_test_dataset.to_tf_dataset(
    columns=['input_ids', 'attention_mask'],
    label_cols=['label'],
    shuffle=False,
    batch_size=16,
    collate_fn=None
)

print("Datasets converted to Tensorflow format!")
print(f"Training batches: ~{len(small_train_dataset) // 16}")
print(f"Test batches: ~{len(small_test_dataset) // 16}")

Old behaviour: columns=['a'], labels=['labels'] -> (tf.Tensor, tf.Tensor)  
             : columns='a', labels='labels' -> (tf.Tensor, tf.Tensor)  
New behaviour: columns=['a'],labels=['labels'] -> ({'a': tf.Tensor}, {'labels': tf.Tensor})  
             : columns='a', labels='labels' -> (tf.Tensor, tf.Tensor) 


Datasets converted to Tensorflow format!
Training batches: ~62
Test batches: ~12


In [16]:
#7Load Model for Classification
from transformers import TFAutoModelForSequenceClassification

#Load pre-trained model with classification head
model_for_classification = TFAutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2,
    use_safetensors=False
)
print("Classification model loaded successfully!")
print(f"Model has {model_for_classification.num_labels} output labels")

Some layers from the model checkpoint at distilbert-base-uncased were not used when initializing TFDistilBertForSequenceClassification: ['vocab_transform', 'vocab_projector', 'vocab_layer_norm', 'activation_13']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier', 'classifier', 'dropout_152']
You should probably TRAIN this model on a down-stream task to be able to use 

Classification model loaded successfully!
Model has 2 output labels


In [18]:
import tensorflow as tf

#8 Compile the model
model_for_classification.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=5e-5),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

print("Model compiled and ready for training!")

Model compiled and ready for training!


In [19]:
#9 Train model

print("Starting training...")

history = model_for_classification.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=3
)

print("\nTraining complete!")

Starting training...
Epoch 1/3


Epoch 2/3
Epoch 3/3

Training complete!


In [20]:
#10. Evaluate Model
print("Evaluating model...")
results = model_for_classification.evaluate(test_dataset)

print(f"\nTest Loss: {results[0]:.4f}")
print(f"Test Accuracy: {results[1]:.4f}")

Evaluating model...

Test Loss: 0.5495
Test Accuracy: 0.7800


In [21]:
#11: Make Predictions on New Text

test_reviews = [
    "This movie was absolutely fantastic! I loved every minute of it.",
    "Terrible film. Complete waste of time and money.",
    "It was okay, nothing special but not bad either."
]

for review in test_reviews:
    #Tokenize
    inputs = tokenizer(review, return_tensors="tf", padding=True, truncation=True, max_length=128)
    #Predict
    outputs = model_for_classification(inputs)
    predictions = tf.nn.softmax(outputs.logits, axis=-1)
    predicted_class = tf.argmax(predictions, axis=-1).numpy()[0]
    confidence = predictions.numpy()[0][predicted_class]

    sentiment = "positive" if predicted_class == 1 else "Negative"

    print(f"\nReview: {review[:60]}...")
    print(f"Sentiment: {sentiment} (confidence: {confidence:.2%})")


Review: This movie was absolutely fantastic! I loved every minute of...
Sentiment: positive (confidence: 97.98%)

Review: Terrible film. Complete waste of time and money....
Sentiment: Negative (confidence: 98.32%)

Review: It was okay, nothing special but not bad either....
Sentiment: Negative (confidence: 83.54%)


In [22]:
#12 Save model and tokenizer

save_path = "./my_sentiment_model"

model_for_classification.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

print(f"Model and tokenizer saved to {save_path}")

Model and tokenizer saved to ./my_sentiment_model


In [23]:
#13 Load Saved Model Later

loaded_model = TFAutoModelForSequenceClassification.from_pretrained(
    save_path,
    use_safetensors=False
)

loaded_tokenizer = AutoTokenizer.from_pretrained(save_path)

print("Model loaded from disk successfully!")

#Test loaded model
test_text = "This is amazing product!"
inputs = loaded_tokenizer(test_text, return_tensors="tf") 
outputs = loaded_model(inputs)
prediction = tf.argmax(outputs.logits, axis=-1).numpy()[0]

print(f"Prediction: {'Positive' if prediction == 1 else 'Negative'}")

Some layers from the model checkpoint at ./my_sentiment_model were not used when initializing TFDistilBertForSequenceClassification: ['dropout_152']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForSequenceClassification were not initialized from the model checkpoint at ./my_sentiment_model and are newly initialized: ['dropout_172']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded from disk successfully!
Prediction: Positive


In [None]:
# import numpy as np
# import torch

# kernel = torch.tensor([
#     [0, -1, 0],
#     [-1, 5, -1],
#     [0, -1, 0],
# ], dtype=torch.float32)

# bias = torch.tensor([5], dtype=torch.float32)

# image = torch.tensor(
#     [[1,2,3,4],
#     [5,6,7,8],
#     [9,10,11,12],
#     [13,14,15,16]], dtype=torch.float32
# )

# def Output_shape(image, kernel, padding, stride):
#     h, w = image.shape[-2], image.shape[-1]
#     k_h, k_w = kernel.shape[-2], kernel.shape[-1]

#     h_out = (h-k_h * padding) // stride[0] +1
#     w_out = (w-k_w * pading) // stride[1] + 1
#     return h_out, w_out
# Output_shape(image, kernel, padding=0, stride=(1,1))