In [2]:
import random
from PIL import Image
import os

# Directory where images will be saved
output_dir = "data"
os.makedirs(output_dir, exist_ok=True)

# Function to generate random image
def generate_random_image(width, height, image_number):
    image = Image.new('RGB', (width, height))
    pixels = image.load()

    for i in range(width):
        for j in range(height):
            # Random RGB color for each pixel
            pixels[i, j] = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))

    # Save the image
    image.save(f"{output_dir}/random_image_{image_number}.png")

# Generate 100 random images with specified dimensions
for i in range(1, 1001):
    generate_random_image(256, 256, i)

print("1000 random images generated successfully!")



1000 random images generated successfully!


In [2]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Flatten, BatchNormalization, Conv2D, MaxPool2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix
from pathlib import Path
import itertools
import os
import shutil
import random
import glob
import matplotlib.pyplot as plt
import warnings
from pathlib import Path
warnings.simplefilter(action='ignore', category=FutureWarning)
%matplotlib inline
from PIL import Image

In [2]:
os.chdir('data')
if os.path.isdir('train/positive') is False:
    os.makedirs( 'train/positive')
    os.makedirs('train/negative')
    os.makedirs('train/neutral')
    os.makedirs( 'valid/positive')
    os.makedirs('valid/negative')
    os.makedirs( 'valid/neutral')
    os.makedirs ('test/positive')
    os.makedirs( 'test/negative')
    os.makedirs ('test/neutral')
for c in random.sample(glob.glob( 'random_image*'), 233):
    shutil.move(c, 'train/positive' )
for c in random.sample(glob.glob( 'random_image*'), 233):
    shutil.move(c, 'train/negative' )
for c in random.sample(glob.glob( 'random_image*'), 233):
    shutil.move(c, 'train/neutral' )
for c in random.sample(glob.glob( 'random_image*'), 66):
    shutil.move(c, 'valid/positive' )
for c in random.sample(glob.glob( 'random_image*'), 66):
    shutil.move(c, 'valid/negative' )
for c in random.sample(glob.glob( 'random_image*'), 66):
    shutil.move(c, 'valid/neutral' )
for c in random.sample(glob.glob( 'random_image*'), 33):
    shutil.move(c, 'test/positive' )
for c in random.sample(glob.glob( 'random_image*'), 33):
    shutil.move(c, 'test/negative' )
for c in random.sample(glob.glob( 'random_image*'), 33):
    shutil.move(c, 'test/neutral' )
    
os.chdir('../../')

In [2]:
train_path = Path("data/train")
valid_path = Path("data/valid")
test_path = Path("data/test")

In [13]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from transformers import ViTForImageClassification
from transformers import AdamW
from sklearn.metrics import accuracy_score

# Define paths and parameters
data_path = Path("data")  # Update this path to your dataset location
batch_size = 16
num_classes = 3  # Number of sentiment classes (e.g., positive, negative, neutral)
learning_rate = 2e-5
num_epochs = 10

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to match the input size of ViT
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # ViT normalization
])

# Load datasets
train_dataset = ImageFolder(root=f'{data_path}/training', transform=transform)
valid_dataset = ImageFolder(root=f'{data_path}/validation', transform=transform)
test_dataset = ImageFolder(root=f'{data_path}/testing', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Load the Vision Transformer model
model_name = 'google/vit-base-patch16-224-in21k'
model = ViTForImageClassification.from_pretrained(model_name, num_labels=num_classes)

# Set up the optimizer and loss function
optimizer = AdamW(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

# Training and evaluation function
def train_and_evaluate(model, train_loader, valid_loader, test_loader, num_epochs):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            
            outputs = model(images).logits
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * images.size(0)
        
        epoch_loss = running_loss / len(train_loader.dataset)
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}')
        
        # Validate
        model.eval()
        all_preds = []
        all_labels = []
        
        with torch.no_grad():
            for images, labels in valid_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images).logits
                _, preds = torch.max(outputs, 1)
                
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        accuracy = accuracy_score(all_labels, all_preds)
        print(f'Validation Accuracy: {accuracy:.4f}')
    
    # Test
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images).logits
            _, preds = torch.max(outputs, 1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    accuracy = accuracy_score(all_labels, all_preds)
    print(f'Test Accuracy: {accuracy:.4f}')

# Run training and evaluation
train_and_evaluate(model, train_loader, valid_loader, test_loader, num_epochs)


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10, Loss: 1.1099
Validation Accuracy: 0.3333
Epoch 2/10, Loss: 1.1011
Validation Accuracy: 0.3333
Epoch 3/10, Loss: 1.1004
Validation Accuracy: 0.3333
Epoch 4/10, Loss: 1.1018
Validation Accuracy: 0.3333
Epoch 5/10, Loss: 1.0999
Validation Accuracy: 0.3333
Epoch 6/10, Loss: 1.0988
Validation Accuracy: 0.3333
Epoch 7/10, Loss: 1.0983
Validation Accuracy: 0.3333
Epoch 8/10, Loss: 1.0989
Validation Accuracy: 0.3384
Epoch 9/10, Loss: 1.0990
Validation Accuracy: 0.3333
Epoch 10/10, Loss: 1.0968
Validation Accuracy: 0.3687
Test Accuracy: 0.3232
