In [None]:

from google.colab import drive
drive.mount('/content/drive')


import torch
import torchvision
from torchvision import transforms, datasets
import matplotlib.pyplot as plt
import os
from PIL import Image

# Define the path to the dataset
dataset_directory = "/content/drive/MyDrive/Tomatoes"

# Define transformations for the dataset (resize and normalize)
data_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

# Load the dataset
image_dataset = datasets.ImageFolder(dataset_directory, transform=data_transform)

# Access and print the class names
class_names = image_dataset.classes
print("Class Names:", class_names)

# Function to display images
def display_images(class_name, images, n=5):
    plt.figure(figsize=(20, 10))
    for i in range(n):
        plt.subplot(1, n, i + 1)
        plt.imshow(images[i].permute(1, 2, 0))  # Convert from tensor to numpy array for display
        plt.title(class_name)
        plt.axis('off')
    plt.show()

# Load and display images from each class
for class_name in class_names:
    class_dir = os.path.join(dataset_directory, class_name)
    images = []
    image_files = sorted(os.listdir(class_dir))[:5]  # Get the first 5 images
    for image_file in image_files:
        img_path = os.path.join(class_dir, image_file)
        img = Image.open(img_path)  # Using PIL to open the image
        img_tensor = data_transform(img)  # Transform the image to tensor
        images.append(img_tensor)
    display_images(class_name, images)


import os
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from PIL import Image
import numpy as np

# Define the path to the dataset
dataset_directory = "/content/drive/MyDrive/Tomatoes"

# Function to display dimensions of images
def display_image_dimensions(image_paths):
    for img_path in image_paths:
        img = Image.open(img_path)  # Using PIL to open the image
        print(f"Image shape: {np.array(img).shape}")  # Convert to numpy array to get shape

# Function to plot class distribution
def plot_class_distribution():
    class_counts = {}
    for class_name in os.listdir(dataset_directory):
        class_dir = os.path.join(dataset_directory, class_name)
        if os.path.isdir(class_dir):  # Ensure it's a directory
            class_counts[class_name] = len(os.listdir(class_dir))

    plt.bar(class_counts.keys(), class_counts.values())
    plt.xlabel('Class')
    plt.ylabel('Number of Images')
    plt.title('Class Distribution')
    plt.xticks(rotation=45)
    plt.show()

# Perform EDA
for class_name in os.listdir(dataset_directory):
    class_dir = os.path.join(dataset_directory, class_name)
    if os.path.isdir(class_dir):  # Ensure it's a directory
        image_files = sorted(os.listdir(class_dir))[:5]  # Get the first 5 images
        image_paths = [os.path.join(class_dir, image_file) for image_file in image_files]
        print(f"Class: {class_name}")
        display_image_dimensions(image_paths)

plot_class_distribution()

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import confusion_matrix
import numpy as np
import matplotlib.pyplot as plt

# Define the path to the dataset
dataset_directory = "/content/drive/MyDrive/Tomatoes"

# Define transformations for the dataset (resize and normalize)
data_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalization values for ImageNet
])

# Load the dataset
image_dataset = datasets.ImageFolder(dataset_directory, transform=data_transform)

# Split the dataset into training and validation sets
val_split = 0.2
train_size = int((1 - val_split) * len(image_dataset))
val_size = len(image_dataset) - train_size
train_dataset, val_dataset = random_split(image_dataset, [train_size, val_size])

# Define DataLoaders
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Define the model - ResNet18
model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 3)  # Modify final fully connected layer for 3 classes

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Function to calculate accuracy
def calculate_accuracy(outputs, labels):
    _, predicted = torch.max(outputs, 1)
    correct = (predicted == labels).sum().item()
    total = labels.size(0)
    accuracy = (correct / total) * 100
    return accuracy

# Training loop
num_epochs = 20
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

# Lists to store metrics for plotting
train_losses = []
train_accuracies = []
val_losses = []
val_accuracies = []

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    total_correct = 0
    total_samples = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)

        # Calculate training accuracy
        total_correct += (outputs.argmax(dim=1) == labels).sum().item()
        total_samples += labels.size(0)

    epoch_loss = running_loss / len(train_dataset)
    epoch_accuracy = (total_correct / total_samples) * 100

    train_losses.append(epoch_loss)
    train_accuracies.append(epoch_accuracy)

    # Validation phase
    model.eval()
    val_running_loss = 0.0
    val_total_correct = 0
    val_total_samples = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item() * inputs.size(0)
            val_total_correct += (outputs.argmax(dim=1) == labels).sum().item()
            val_total_samples += labels.size(0)

    val_epoch_loss = val_running_loss / len(val_dataset)
    val_epoch_accuracy = (val_total_correct / val_total_samples) * 100

    val_losses.append(val_epoch_loss)
    val_accuracies.append(val_epoch_accuracy)

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%, Val Loss: {val_epoch_loss:.4f}, Val Accuracy: {val_epoch_accuracy:.2f}%")

# Calculate overall accuracy on the validation set
model.eval()
total_correct = 0
total_samples = 0
all_labels = []
all_preds = []

with torch.no_grad():
    for inputs, labels in val_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)

        # Calculate accuracy
        total_correct += (outputs.argmax(dim=1) == labels).sum().item()
        total_samples += labels.size(0)

        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(outputs.argmax(dim=1).cpu().numpy())

overall_accuracy = (total_correct / total_samples) * 100
print(f"\nOverall Validation Accuracy: {overall_accuracy:.2f}%")

# Compute confusion matrix
conf_matrix = confusion_matrix(all_labels, all_preds)

# Display confusion matrix
plt.figure(figsize=(8, 6))
classes = image_dataset.classes
plt.imshow(conf_matrix, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.tight_layout()

# Print confusion matrix
print("\nConfusion Matrix:")
print(conf_matrix)
plt.show()

# Plotting training metrics (loss and accuracy)
plt.figure(figsize=(10, 5))

# Plotting training loss
plt.subplot(1, 2, 1)
plt.plot(range(1, num_epochs + 1), train_losses, label='Training Loss')
plt.plot(range(1, num_epochs + 1), val_losses, label='Validation Loss', color='g')
plt.title('Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# Plotting training accuracy
plt.subplot(1, 2, 2)
plt.plot(range(1, num_epochs + 1), train_accuracies, label='Training Accuracy', color='r')
plt.plot(range(1, num_epochs + 1), val_accuracies, label='Validation Accuracy', color='g')
plt.title('Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()

plt.tight_layout()
plt.show()

Collecting openai
  Downloading openai-1.35.13-py3-none-any.whl (328 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m328.5/328.5 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gtts
  Downloading gTTS-2.5.1-py3-none-any.whl (29 kB)
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [