In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
import os

# Check for GPU/TPU availability
if not tf.config.list_physical_devices('GPU'):
    print("No GPU detected. Training may be slower.")
else:
    print("GPU is available!")

# Load and preprocess CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Normalize pixel values to the range [0, 1]
x_train = x_train / 255.0
x_test = x_test / 255.0

# Convert labels to one-hot encoding
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Optional: Use a smaller subset for quick testing (uncomment if needed)
# x_train = x_train[:10000]
# y_train = y_train[:10000]
# x_test = x_test[:2000]
# y_test = y_test[:2000]

# Load the pre-trained MobileNetV2 model without the top layer
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(96, 96, 3))

# Freeze the base model layers to retain pre-trained features
base_model.trainable = False

# Add new layers for CIFAR-10 classification
model = models.Sequential([
    layers.Resizing(96, 96),  # Resize CIFAR-10 images to 96x96
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),  # Dropout for regularization
    layers.Dense(10, activation='softmax')  # 10 classes for CIFAR-10
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Display the model's architecture
model.summary()

# Train only the newly added layers
print("\nTraining the new layers...")
history = model.fit(x_train, y_train, epochs=3, batch_size=128,  # Reduced epochs for faster debugging
                    validation_data=(x_test, y_test))

# Unfreeze the base model layers and fine-tune
print("\nFine-tuning the entire model...")
base_model.trainable = True
model.compile(optimizer=tf.keras.optimizers.Adam(1e-5),  # Lower learning rate
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history_fine = model.fit(x_train, y_train, epochs=3, batch_size=128,  # Reduced epochs
                         validation_data=(x_test, y_test))

# Evaluate the model
print("\nEvaluating the model on the test set...")
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
print(f'\nTest accuracy: {test_acc}')


No GPU detected. Training may be slower.



Training the new layers...
Epoch 1/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 653ms/step - accuracy: 0.6210 - loss: 1.1405 - val_accuracy: 0.7847 - val_loss: 0.6160
Epoch 2/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m250s[0m 640ms/step - accuracy: 0.7608 - loss: 0.6839 - val_accuracy: 0.7997 - val_loss: 0.5807
Epoch 3/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m243s[0m 622ms/step - accuracy: 0.7853 - loss: 0.6177 - val_accuracy: 0.8069 - val_loss: 0.5550

Fine-tuning the entire model...
Epoch 1/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1313s[0m 3s/step - accuracy: 0.6645 - loss: 1.1624 - val_accuracy: 0.7858 - val_loss: 0.6705
Epoch 2/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1495s[0m 4s/step - accuracy: 0.8355 - loss: 0.5096 - val_accuracy: 0.8157 - val_loss: 0.5646
Epoch 3/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1434s[0m 4s/step - accuracy: 0.8679 - loss: 0.40

In [7]:
import tensorflow_datasets as tfds
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import nltk
from nltk.corpus import stopwords
import numpy as np

# Download the IMDB dataset using TensorFlow Datasets
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# Load IMDB dataset from TensorFlow datasets (train and test)
dataset, info = tfds.load('imdb_reviews', with_info=True, as_supervised=True)

# Split the dataset into train and test
train_data, test_data = dataset['train'], dataset['test']

# Preprocess the data: remove stopwords, tokenize, and convert to string format
def preprocess_text(text):
    # Convert text to string and remove stopwords
    text_str = text.numpy().decode('utf-8')
    words = text_str.split()
    filtered_words = [word for word in words if word.lower() not in stop_words]
    return " ".join(filtered_words)

# Convert data to list and preprocess
train_texts = [preprocess_text(text) for text, label in train_data]
test_texts = [preprocess_text(text) for text, label in test_data]

train_labels = [label.numpy() for text, label in train_data]
test_labels = [label.numpy() for text, label in test_data]

# Convert to TF-IDF features
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(train_texts)
X_test_tfidf = vectorizer.transform(test_texts)

# Train Logistic Regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf, train_labels)

# Make predictions
y_pred = model.predict(X_test_tfidf)

# Evaluate model
print("Accuracy:", accuracy_score(test_labels, y_pred))
print("Classification Report:")
print(classification_report(test_labels, y_pred))


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


[1mDownloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to C:\Users\user\tensorflow_datasets\imdb_reviews\plain_text\1.0.0...[0m


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...: 0 examples [00:00, ? examples/s]

Shuffling C:\Users\user\tensorflow_datasets\imdb_reviews\plain_text\incomplete.0RBOG7_1.0.0\imdb_reviews-train…

Generating test examples...: 0 examples [00:00, ? examples/s]

Shuffling C:\Users\user\tensorflow_datasets\imdb_reviews\plain_text\incomplete.0RBOG7_1.0.0\imdb_reviews-test.…

Generating unsupervised examples...: 0 examples [00:00, ? examples/s]

Shuffling C:\Users\user\tensorflow_datasets\imdb_reviews\plain_text\incomplete.0RBOG7_1.0.0\imdb_reviews-unsup…

[1mDataset imdb_reviews downloaded and prepared to C:\Users\user\tensorflow_datasets\imdb_reviews\plain_text\1.0.0. Subsequent calls will reuse this data.[0m
Accuracy: 0.88468
Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.88      0.88     12500
           1       0.88      0.89      0.88     12500

    accuracy                           0.88     25000
   macro avg       0.88      0.88      0.88     25000
weighted avg       0.88      0.88      0.88     25000



In [5]:
pip install tensorflow_datasets

Collecting tensorflow_datasets
  Downloading tensorflow_datasets-4.9.7-py3-none-any.whl.metadata (9.6 kB)
Collecting dm-tree (from tensorflow_datasets)
  Downloading dm_tree-0.1.8-cp312-cp312-win_amd64.whl.metadata (2.0 kB)
Collecting immutabledict (from tensorflow_datasets)
  Downloading immutabledict-4.2.1-py3-none-any.whl.metadata (3.5 kB)
Collecting promise (from tensorflow_datasets)
  Downloading promise-2.3.tar.gz (19 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting simple-parsing (from tensorflow_datasets)
  Downloading simple_parsing-0.1.7-py3-none-any.whl.metadata (7.3 kB)
Collecting tensorflow-metadata (from tensorflow_datasets)
  Downloading tensorflow_metadata-1.16.1-py3-none-any.whl.metadata (2.4 kB)
Collecting etils>=1.9.1 (from etils[edc,enp,epath,epy,etree]>=1.9.1; python_version >= "3.11"->tensorflow_datasets)
  Downloading etils-1.11.0-py3-none-any.whl.metadata (6.5 kB)
Collecting importlib_resources 

In [13]:
# Import necessary libraries
import tensorflow_datasets as tfds
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import nltk
from nltk.corpus import stopwords

# Download NLTK stopwords (run this once)
nltk.download('stopwords')

# IMDB dataset from TensorFlow Datasets (train and test)
dataset, info = tfds.load('imdb_reviews', with_info=True, as_supervised=True)

# Split the dataset into train and test
train_data, test_data = dataset['train'], dataset['test']

# Preprocess the text data: remove stopwords, tokenize, and convert to string format
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    # Check if the input is a TensorFlow tensor (which would need .numpy()) or a plain string
    if isinstance(text, tf.Tensor):
        text_str = text.numpy().decode('utf-8')  # Decode if it's a tensor
    else:
        text_str = text  # If it's a string, use it directly
    
    # Tokenize and remove stopwords
    words = text_str.split()
    filtered_words = [word for word in words if word.lower() not in stop_words]
    return " ".join(filtered_words)

# Convert the train and test data into processed texts and labels
train_texts = [preprocess_text(text) for text, label in train_data]
test_texts = [preprocess_text(text) for text, label in test_data]

train_labels = [label.numpy() for text, label in train_data]
test_labels = [label.numpy() for text, label in test_data]

# Convert texts to TF-IDF features
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(train_texts)
X_test_tfidf = vectorizer.transform(test_texts)

# Train a Logistic Regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf, train_labels)

# Make predictions
y_pred = model.predict(X_test_tfidf)

# Evaluate the model
print("Accuracy:", accuracy_score(test_labels, y_pred))
print("Classification Report:")
print(classification_report(test_labels, y_pred))

# Example of using the model to predict sentiment on new text
def predict_sentiment(text):
    # Preprocess the text
    text_processed = preprocess_text(text)
    
    # Vectorize the text
    text_tfidf = vectorizer.transform([text_processed])
    
    # Predict sentiment (0: Negative, 1: Positive)
    prediction = model.predict(text_tfidf)
    sentiment = "Positive" if prediction[0] == 1 else "Negative"
    return sentiment

# Test with new sentences
new_text = "I absolutely loved this movie!"
print(f"Sentiment: {predict_sentiment(new_text)}")

new_text = "This movie was terrible, I hated it."
print(f"Sentiment: {predict_sentiment(new_text)}")


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Accuracy: 0.88468
Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.88      0.88     12500
           1       0.88      0.89      0.88     12500

    accuracy                           0.88     25000
   macro avg       0.88      0.88      0.88     25000
weighted avg       0.88      0.88      0.88     25000

Sentiment: Positive
Sentiment: Negative
