In [1]:
import tensorflow as tf

# Enable memory growth to allocate only as needed
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    for gpu in physical_devices:
        tf.config.experimental.set_memory_growth(gpu, True)

# Alternatively, set a memory limit if you want to control the allocation
# Uncomment the lines below and adjust the limit as needed
memory_limit = 4096  # Set memory limit in MB (e.g., 4GB)
tf.config.set_logical_device_configuration(
    gpu,
    [tf.config.LogicalDeviceConfiguration(memory_limit=memory_limit)]
)

2024-10-10 08:49:56.252509: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-10 08:49:56.288262: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-10 08:49:56.297485: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-10 08:49:56.345945: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
I0000 00:00:1728550202.193802    1204 cuda_executor.c

In [2]:
# Import the necessary libraries
import wandb
from dotenv import load_dotenv
import os

# Load the environment variables from the .env file
load_dotenv()

# Get the API key from the environment variable
api_key = os.getenv("WANDB_API_KEY")

# Login to Weights & Biases using the API key
try:
    wandb.login(key=api_key)
    print("Logged in successfully.")
except Exception as e:
    print(f"Error during login: {e}")

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mravikumarchavva[0m ([33mravikumarchavva-org[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Logged in successfully.


In [3]:
# Updated Configuration
CONFIGURATION = {
    'BATCH_SIZE': 6,
    'IM_SIZE': 224,
    'N_EPOCHS': 30,
    'LEARNING_RATE': 0.001,
    'NUM_CLASSES': 3,
}

run = wandb.init(
    # Set the project where this run will be logged
    project="transformers-human-pose-estimation",

    # Set the experiment name
    name="human-pose-estimation",
    # Track hyperparameters and run metadata
    config={
        "learning_rate": CONFIGURATION['LEARNING_RATE'],
        "epochs": CONFIGURATION['N_EPOCHS'],
        "batch_size": CONFIGURATION['BATCH_SIZE'],
        "image_size": CONFIGURATION['IM_SIZE'],
        "num_classes": CONFIGURATION['NUM_CLASSES'],
    },
)

In [4]:
import tensorflow as tf

TRAIN_DIR = '../../EmotionsDataset/train/'
TEST_DIR = '../../EmotionsDataset/test/'
CLASS_NAMES = ['angry','happy','sad']

In [5]:
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(directory=TRAIN_DIR,
    labels='inferred',
    label_mode='categorical',
    class_names=CLASS_NAMES,
    color_mode='rgb',
    batch_size=CONFIGURATION['BATCH_SIZE'],
    image_size=(CONFIGURATION['IM_SIZE'], CONFIGURATION['IM_SIZE']),
    shuffle=True,
    seed=42,
    validation_split=None,
    subset=None
)
test_dataset = tf.keras.preprocessing.image_dataset_from_directory(directory=TEST_DIR,
    labels='inferred',
    label_mode='categorical',
    class_names=CLASS_NAMES,
    color_mode='rgb',
    batch_size=CONFIGURATION['BATCH_SIZE'],
    image_size=(CONFIGURATION['IM_SIZE'], CONFIGURATION['IM_SIZE']),
    shuffle=True,
    seed=42,
    validation_split=None,
    subset=None
)

Found 6799 files belonging to 3 classes.


I0000 00:00:1728550220.681636    1204 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1728550220.681740    1204 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1728550220.681768    1204 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1728550220.950773    1204 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-10-10 08:50:20.950820: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2112] Could not identify NUMA node of platform GPU id 0, defaulting to 0.  Your kernel may not have been built with NUMA support.

Found 2280 files belonging to 3 classes.


In [6]:
from tensorflow.keras.callbacks import Callback,CSVLogger,EarlyStopping
csv_logger = CSVLogger("effcientNetLogs.csv",separator=',',append=False)
es_callback = EarlyStopping(restore_best_weights=True,patience=4)
from tensorflow.keras.callbacks import LearningRateScheduler

def scheduler(epoch, lr):
    if epoch < 3:
        return lr
    else:
        return float(lr * tf.math.exp(-0.1))

sched = LearningRateScheduler(scheduler, verbose=1)

In [7]:
# Preprocessing function
def preprocess(image, label):
    image = tf.image.resize(image, [CONFIGURATION['IM_SIZE'],CONFIGURATION['IM_SIZE']])  # Resize images to 224x224
    image = tf.cast(image, tf.float32)  # Convert images to float32
    return image, label

train_dataset = train_dataset.map(preprocess)
test_dataset = test_dataset.map(preprocess)

# Normalization function
def normalise(image, label):
    return image / 255.0, label

# Apply normalization
train_dataset = train_dataset.map(normalise)
test_dataset = test_dataset.map(normalise)

# # Apply shuffling, batching, and prefetching
train_dataset = train_dataset.shuffle(buffer_size=1024).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.shuffle(buffer_size=1024).prefetch(tf.data.AUTOTUNE)

In [22]:
from transformers import TFViTModel
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy,TopKCategoricalAccuracy

model = TFViTModel.from_pretrained("google/vit-base-patch16-224-in21k", num_labels=CONFIGURATION['NUM_CLASSES'])

All PyTorch model weights were used when initializing TFViTModel.

All the weights of TFViTModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTModel for predictions without further training.


In [47]:
tf.keras.Model(model).summary()

TypeError: Layer.__init__() takes 1 positional argument but 2 were given

: 

: 

: 

: 

: 

: 

In [24]:
model.summary()

Model: "tf_vi_t_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vit (TFViTMainLayer)        multiple                  86389248  
                                                                 
Total params: 86389248 (329.55 MB)
Trainable params: 86389248 (329.55 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [36]:
type(model)

transformers.models.vit.modeling_tf_vit.TFViTModel

In [46]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling1D

# Define the input layer
inputs = Input(shape=(CONFIGURATION['IM_SIZE'], CONFIGURATION['IM_SIZE'], 3))

# Pass the inputs through the resize and rescale layers
x = resize_rescale(inputs)

# Convert KerasTensor to TensorFlow tensor
x = tf.convert_to_tensor(x)

# Pass the processed inputs through the ViT model
x = model(pixel_values=x).last_hidden_state

# Add a global average pooling layer
x = GlobalAveragePooling1D()(x)

# Add a dense layer for classification
outputs = Dense(CONFIGURATION['NUM_CLASSES'], activation='softmax')(x)

# Create the final model
tf_model = Model(inputs=inputs, outputs=outputs)

# Compile the model
tf_model.compile(
    optimizer=Adam(learning_rate=CONFIGURATION['LEARNING_RATE']),
    loss=CategoricalCrossentropy(from_logits=False),
    metrics=[CategoricalAccuracy(), TopKCategoricalAccuracy(k=3)]
)

# Display the model summary
tf_model.summary()

ValueError: A KerasTensor cannot be used as input to a TensorFlow function. A KerasTensor is a symbolic placeholder for a shape and dtype, used when constructing Keras Functional models or Keras Functions. You can only use it as input to a Keras layer or a Keras operation (from the namespaces `keras.layers` and `keras.operations`). You are likely doing something like:

```
x = Input(...)
...
tf_fn(x)  # Invalid.
```

What you should do instead is wrap `tf_fn` in a layer:

```
class MyLayer(Layer):
    def call(self, x):
        return tf_fn(x)

x = MyLayer()(x)
```


In [31]:
resize_rescale = tf.keras.Sequential([
    tf.keras.Input(shape=(CONFIGURATION['IM_SIZE'], CONFIGURATION['IM_SIZE'], 3)),
    tf.keras.layers.Rescaling(1./255),
    tf.keras.layers.Resizing(CONFIGURATION['IM_SIZE'], CONFIGURATION['IM_SIZE']),
    tf.keras.layers.Permute((3, 1, 2))
])
resize_rescale.summary()

In [35]:
x = resize_rescale(tf.keras.layers.Input(shape=(CONFIGURATION['IM_SIZE'], CONFIGURATION['IM_SIZE'], 3)))
x = model.vit(x)
x

ValueError: Exception encountered when calling layer 'vit' (type TFViTMainLayer).

Data of type <class 'keras.src.backend.common.keras_tensor.KerasTensor'> is not allowed only (<class 'tensorflow.python.framework.tensor.Tensor'>, <class 'bool'>, <class 'int'>, <class 'transformers.utils.generic.ModelOutput'>, <class 'tuple'>, <class 'list'>, <class 'dict'>, <class 'numpy.ndarray'>) is accepted for pixel_values.

Call arguments received by layer 'vit' (type TFViTMainLayer):
  • pixel_values=<KerasTensor shape=(None, 3, 224, 224), dtype=float32, sparse=False, name=keras_tensor_12>
  • head_mask=None
  • output_attentions=None
  • output_hidden_states=None
  • interpolate_pos_encoding=None
  • return_dict=None
  • training=False

In [32]:
hf_model = tf.keras.Sequential([
    resize_rescale,
    model
])

ValueError: Only instances of `keras.Layer` can be added to a Sequential model. Received: <transformers.models.vit.modeling_tf_vit.TFViTModel object at 0x7fb3be9e7190> (of type <class 'transformers.models.vit.modeling_tf_vit.TFViTModel'>)

In [51]:
model.compile(
    optimizer=Adam(learning_rate=CONFIGURATION['LEARNING_RATE']),
    metrics=[CategoricalAccuracy(), TopKCategoricalAccuracy(k=3)],
    loss=CategoricalCrossentropy(from_logits=True)  # Ensure from_logits is set if using logits
)

AttributeError: 'Variable' object has no attribute '_distribute_strategy'

In [42]:
train_dataset.take(1)

<_TakeDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 3), dtype=tf.float32, name=None))>

In [69]:
run.finish()

VBox(children=(Label(value='0.005 MB of 0.005 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))