In [1]:
%pip install tensorflow

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [17]:
import tensorflow as tf
from keras.applications import MobileNetV2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from keras import backend as K

## Image

In [18]:
# Load dataset
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()
train_images, test_images = train_images / 255.0, test_images / 255.0
train_labels, test_labels = to_categorical(train_labels), to_categorical(test_labels)

In [19]:
# Feature Extractor
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(32, 32, 3))
base_model.trainable = False  # Freeze the model

model = Sequential([
    base_model,
    Flatten(),
    Dense(512, activation='relu'),
    Dense(10, activation='softmax')
])

  base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(32, 32, 3))


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [21]:
# Define the contrastive loss
def sup_contrastive_loss(y_true, y_pred):
    
    contrastive_loss = tf.reduce_mean(tf.square(y_true - y_pred))

    return contrastive_loss

# Train the model with supervised contrastive loss
model.compile(optimizer='adam', loss=sup_contrastive_loss, metrics=['accuracy'])
history_contrastive = model.fit(train_images, train_labels, epochs=5, validation_data=(test_images, test_labels))

# Fine Tuning
base_model.trainable = True
for layer in base_model.layers[:-10]:
    layer.trainable = False
    
# Train the model with softmax loss
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history_softmax = model.fit(train_images, train_labels, epochs=5, validation_data=(test_images, test_labels))

Epoch 1/5
[1m 825/1563[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m16s[0m 22ms/step - accuracy: 0.2664 - loss: 0.0837

KeyboardInterrupt: 

In [13]:
# Plot training & validation accuracy values
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history_contrastive.history['accuracy'], label='Contrastive - Train', color='blue')
plt.plot(history_contrastive.history['val_accuracy'], label='Contrastive - Test', color='blue', linestyle = "dashdot")
plt.plot(history_softmax.history['accuracy'], label='Softmax - Train', color='red')
plt.plot(history_softmax.history['val_accuracy'], label='Softmax - Test', color='red', linestyle = "dashdot")
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(loc='lower right')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history_contrastive.history['loss'], label='Contrastive - Train', color='blue')
plt.plot(history_contrastive.history['val_loss'], label='Contrastive - Test', color='blue', linestyle = "dashdot")
plt.plot(history_softmax.history['loss'], label='Softmax - Train', color='red')
plt.plot(history_softmax.history['val_loss'], label='Softmax - Test', color='red', linestyle = "dashdot")
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(loc='upper right')

plt.tight_layout()
plt.show()

Epoch 1/5
[1m 786/1563[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m1:00[0m 78ms/step - accuracy: 0.4969 - loss: 1.4227

KeyboardInterrupt: 

# Video

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Load pre-trained ResNet50 model without the top classification layer
base_model = ResNet50(input_shape=(128, 128, 3), include_top=False, weights='imagenet')

# Freeze the pre-trained layers
for layer in base_model.layers:
    layer.trainable = False

# Add new classification layers
inputs = Input(shape=(128, 128, 3))
x = base_model(inputs, training=False)
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
outputs = Dense(10, activation='softmax')(x)  # Assuming 10 classes for classification
model = Model(inputs, outputs)

# Compile the model
model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Data preprocessing
# Your audio data preprocessing code goes here

# Train the model
# Your training code goes here


# Audio

In [None]:
import tensorflow as tf
import numpy as np
import librosa
import os

# Load ESC-50 dataset
def load_esc50_dataset():
    dataset_path = "path_to_esc50_dataset"  # Replace with your dataset path
    audio_paths, labels = [], []
    with open(os.path.join(dataset_path, "meta/esc50.csv")) as f:
        for line in f:
            filename, _, category, _, _ = line.strip().split(",")
            audio_paths.append(os.path.join(dataset_path, "audio", filename))
            labels.append(category)
    return audio_paths, labels

# Load and preprocess audio data
def load_and_preprocess_audio(audio_path, target_duration=2.0, sampling_rate=22050):
    # Load audio file
    y, sr = librosa.load(audio_path, sr=sampling_rate)

    # Ensure the audio has the desired duration
    if len(y) > int(sampling_rate * target_duration):
        y = y[:int(sampling_rate * target_duration)]
    else:
        padding = int(sampling_rate * target_duration) - len(y)
        y = np.pad(y, (0, padding), mode='constant')

    # Extract Mel spectrogram features
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
    mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)

    # Resize spectrogram to a fixed size
    mel_spectrogram = tf.image.resize(mel_spectrogram[np.newaxis, :, :], [128, 128])

    return mel_spectrogram

# Load ESC-50 dataset
audio_paths, labels = load_esc50_dataset()

# Convert labels to integers
label_to_index = {label: i for i, label in enumerate(np.unique(labels))}
indices = [label_to_index[label] for label in labels]

# Create TensorFlow dataset
dataset = tf.data.Dataset.from_tensor_slices((audio_paths, indices))

# Load and preprocess audio data
def load_and_preprocess_audio_wrapper(audio_path, label):
    audio = load_and_preprocess_audio(audio_path.numpy().decode())
    return audio, label

dataset = dataset.map(lambda audio_path, label: tf.py_function(load_and_preprocess_audio_wrapper, [audio_path, label], [tf.float32, tf.int64]))

# Shuffle and batch the dataset
dataset = dataset.shuffle(buffer_size=1000).batch(32)

# Define the model
base_model = tf.keras.applications.ResNet50(input_shape=(128, 128, 3), include_top=False, weights='imagenet')
for layer in base_model.layers:
    layer.trainable = False
x = base_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(256, activation='relu')(x)
predictions = tf.keras.layers.Dense(50, activation='softmax')(x)
model = tf.keras.Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(dataset, epochs=10)