<a href="https://colab.research.google.com/github/SWETHA19052004/SWETHA190052004/blob/main/SWETHA_S_22cs188.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load dataset (replace 'your_dataset.csv' with your actual dataset file)
df = pd.read_csv('/content/bodyPerformance.csv')

# Display the first few rows of the dataset and column names
print(df.head())
print("Columns in dataset:", df.columns)

# Step 1: Handling missing values
# Option 2: Fill missing values with mean, but only for numeric columns
numeric_cols = df.select_dtypes(include=[np.number]).columns  # Select numeric columns
df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())  # Fill missing values in numeric columns

# For non-numeric columns (categorical), you may fill missing values with a mode or other strategy
categorical_cols = df.select_dtypes(include=[object]).columns
df[categorical_cols] = df[categorical_cols].fillna(df[categorical_cols].mode().iloc[0])

# Step 2: Encoding categorical data
label_encoder = LabelEncoder()

# Assuming 'category_column' is a column with categorical data (replace with your actual column name)
if 'category_column' in df.columns:
    df['category_column'] = label_encoder.fit_transform(df['category_column'])

# For multiple categorical columns, you can use one-hot encoding if needed:
# df = pd.get_dummies(df, drop_first=True)

# Step 3: Feature Scaling
# Automatically identify numeric features
numeric_features = df.select_dtypes(include=[np.number]).columns  # Get numeric columns

# Ensure that we are scaling only the numeric columns
scaler = StandardScaler()
df[numeric_features] = scaler.fit_transform(df[numeric_features])

# Step 4: Splitting the dataset into training and test sets
# Define X (features) and y (target variable)
# Replace 'target_column' with your actual target column
target_column = 'target_column'
if target_column in df.columns:
    X = df.drop(target_column, axis=1)
    y = df[target_column]

    # Split into 80% training and 20% test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Display the shapes of the train and test sets
    print(f"X_train shape: {X_train.shape}")
    print(f"X_test shape: {X_test.shape}")
    print(f"y_train shape: {y_train.shape}")
    print(f"y_test shape: {y_test.shape}")
else:
    print(f"Error: '{target_column}' not found in dataset columns.")


    age gender  height_cm  weight_kg  body fat_%  diastolic  systolic  \
0  27.0      M      172.3      75.24        21.3       80.0     130.0   
1  25.0      M      165.0      55.80        15.7       77.0     126.0   
2  31.0      M      179.6      78.00        20.1       92.0     152.0   
3  32.0      M      174.5      71.10        18.4       76.0     147.0   
4  28.0      M      173.8      67.70        17.1       70.0     127.0   

   gripForce  sit and bend forward_cm  sit-ups counts  broad jump_cm class  
0       54.9                     18.4            60.0          217.0     C  
1       36.4                     16.3            53.0          229.0     A  
2       44.8                     12.0            49.0          181.0     C  
3       41.4                     15.2            53.0          219.0     B  
4       43.5                     27.1            45.0          217.0     B  
Columns in dataset: Index(['age', 'gender', 'height_cm', 'weight_kg', 'body fat_%', 'diastolic',
  

In [None]:
# Importing necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision import models

# Step 1: Device configuration (use GPU if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Step 2: Data Preprocessing and Augmentation
# Transformations for training and test sets
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),  # Data augmentation (random cropping)
    transforms.RandomHorizontalFlip(),     # Data augmentation (horizontal flip)
    transforms.ToTensor(),                 # Convert to Tensor
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),  # Normalize based on CIFAR-10 stats
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

# Step 3: Load CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=100, shuffle=False)

# Step 4: Load the Pretrained ResNet-18 Model
# Download pre-trained ResNet18 and fine-tune it for CIFAR-10 (change last fully connected layer)
model = models.resnet18(pretrained=True)

# Modify the final layer to output 10 classes (CIFAR-10 has 10 classes)
model.fc = nn.Linear(model.fc.in_features, 10)
model = model.to(device)

# Step 5: Loss function and Optimizer
criterion = nn.CrossEntropyLoss()  # Use cross-entropy for classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer

# Step 6: Training the Model
num_epochs = 10
total_step = len(train_loader)

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    correct = 0
    total = 0

    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Calculate accuracy
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

        # Print loss and accuracy at each step
        running_loss += loss.item()

        if (i + 1) % 100 == 0:  # Print every 100 batches
            print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{total_step}], '
                  f'Loss: {running_loss / (i + 1):.4f}, Accuracy: {100 * correct / total:.2f}%')

# Step 7: Testing and Evaluation
model.eval()  # Set model to evaluation mode
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    print(f'Test Accuracy: {100 * correct / total:.2f}%')


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 28461664.06it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 123MB/s]


Epoch [1/10], Step [100/391], Loss: 1.3803, Accuracy: 51.54%
Epoch [1/10], Step [200/391], Loss: 1.1853, Accuracy: 58.93%
Epoch [1/10], Step [300/391], Loss: 1.0854, Accuracy: 62.66%
Epoch [2/10], Step [100/391], Loss: 0.7520, Accuracy: 74.66%
Epoch [2/10], Step [200/391], Loss: 0.7430, Accuracy: 74.94%
Epoch [2/10], Step [300/391], Loss: 0.7373, Accuracy: 74.99%
Epoch [3/10], Step [100/391], Loss: 0.6606, Accuracy: 77.61%
Epoch [3/10], Step [200/391], Loss: 0.6523, Accuracy: 77.69%
Epoch [3/10], Step [300/391], Loss: 0.6485, Accuracy: 77.80%
Epoch [4/10], Step [100/391], Loss: 0.6033, Accuracy: 79.75%
Epoch [4/10], Step [200/391], Loss: 0.5948, Accuracy: 79.72%
Epoch [4/10], Step [300/391], Loss: 0.5891, Accuracy: 79.81%
Epoch [5/10], Step [100/391], Loss: 0.5724, Accuracy: 80.19%
Epoch [5/10], Step [200/391], Loss: 0.5561, Accuracy: 80.91%
Epoch [5/10], Step [300/391], Loss: 0.5508, Accuracy: 81.06%
Epoch [6/10], Step [100/391], Loss: 0.5029, Accuracy: 82.45%
Epoch [6/10], Step [200/

In [None]:
import argparse
import pandas as pd
import torch
from torch import distributed, nn, optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("-c", "--config", type=str, default="configs/mnist.yaml")
    parser.add_argument("--resume", type=str, default=None)
    parser.add_argument("--no-cuda", action="store_true")
    parser.add_argument("--data-parallel", action="store_true")

    # distributed
    parser.add_argument("--backend", type=str, default="nccl")
    parser.add_argument("--init-method", type=str, default="tcp://127.0.0.1:23456")
    parser.add_argument("--world-size", type=int, default=1)
    parser.add_argument("--rank", type=int, default=0)

    # Catch additional arguments for notebook environments
    args, _ = parser.parse_known_args()
    return args

def init_process(backend, init_method, world_size, rank):
    distributed.init_process_group(
        backend=backend,
        init_method=init_method,
        world_size=world_size,
        rank=rank,
    )

def load_and_preprocess_data():
    csv_path = '/content/bodyPerformance.csv'

    # Load the dataset
    data = pd.read_csv("/content/bodyPerformance.csv")

    # Separate features and labels
    features = data.iloc[:, :-1]
    labels = data.iloc[:, -1]

    # Handle categorical data in features (if any)
    features = pd.get_dummies(features)

    # Encode labels if they are not numeric
    label_encoder = LabelEncoder()
    labels = label_encoder.fit_transform(labels)

    # Standardize the features
    scaler = StandardScaler()
    features = scaler.fit_transform(features)

    # Split into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(features, labels, test_size=0.2, random_state=42)

    # Convert to PyTorch tensors
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.long)
    X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val, dtype=torch.long)

    # Create PyTorch DataLoader for training and validation sets
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    return train_loader, val_loader

class SimpleNN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

class Trainer:
    def __init__(self, model, optimizer, train_loader, valid_loader, scheduler, device):
        self.model = model
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.valid_loader = valid_loader
        self.scheduler = scheduler
        self.device = device

    def fit(self, epochs=10):
        for epoch in range(epochs):
            self.model.train()
            running_loss = 0.0
            for inputs, labels in self.train_loader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)

                # Zero the parameter gradients
                self.optimizer.zero_grad()

                # Forward pass
                outputs = self.model(inputs)
                loss = nn.CrossEntropyLoss()(outputs, labels)

                # Backward pass and optimize
                loss.backward()
                self.optimizer.step()

                running_loss += loss.item()
            if self.scheduler:
                self.scheduler.step()
            print(f'Epoch {epoch + 1}/{epochs}, Loss: {running_loss / len(self.train_loader)}')

        print('Finished Training')

    def resume(self, checkpoint_path):
        checkpoint = torch.load(checkpoint_path)
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        if 'epoch' in checkpoint:
            self.start_epoch = checkpoint['epoch']

    def evaluate(self):
        self.model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in self.valid_loader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = self.model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        print(f'Validation Accuracy: {accuracy:.2f}%')

def main():
    torch.backends.cudnn.benchmark = True

    args = parse_args()

    if args.world_size > 1:
        init_process(args.backend, args.init_method, args.world_size, args.rank)

    device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")

    # Load and preprocess data
    train_loader, valid_loader = load_and_preprocess_data()

    # Initialize model
    input_size = train_loader.dataset.tensors[0].shape[1]
    num_classes = len(set(train_loader.dataset.tensors[1].numpy()))
    model = SimpleNN(input_size, num_classes)

    if args.world_size > 1:
        model.to(device)
        model = nn.parallel.DistributedDataParallel(model)
    else:
        if args.data_parallel:
            model = nn.DataParallel(model)
        model.to(device)

    # Initialize optimizer and scheduler
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    scheduler = None  # You can define and add a scheduler if needed

    # Initialize trainer
    trainer = Trainer(
        model=model,
        optimizer=optimizer,
        train_loader=train_loader,
        valid_loader=valid_loader,
        scheduler=scheduler,
        device=device
    )

    # Resume training from a checkpoint if provided
    if args.resume is not None:
        trainer.resume(args.resume)

    # Train and evaluate the model
    trainer.fit()
    trainer.evaluate()

if __name__ == "__main__":
    main()


Epoch 1/10, Loss: 1.2674049562482692
Epoch 2/10, Loss: 1.0732840103889578
Epoch 3/10, Loss: 0.9789700565053456
Epoch 4/10, Loss: 0.9282746019648083
Epoch 5/10, Loss: 0.8978482694768194
Epoch 6/10, Loss: 0.8786836414194819
Epoch 7/10, Loss: 0.8652498524580429
Epoch 8/10, Loss: 0.8553005129543703
Epoch 9/10, Loss: 0.8473857609193717
Epoch 10/10, Loss: 0.8408655677268754
Finished Training
Validation Accuracy: 62.64%


In [None]:



import argparse
import pandas as pd
import torch
from torch import distributed, nn, optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("-c", "--config", type=str, default="configs/mnist.yaml")
    parser.add_argument("--resume", type=str, default=None)
    parser.add_argument("--no-cuda", action="store_true")
    parser.add_argument("--data-parallel", action="store_true")

    # distributed
    parser.add_argument("--backend", type=str, default="nccl")
    parser.add_argument("--init-method", type=str, default="tcp://127.0.0.1:23456")
    parser.add_argument("--world-size", type=int, default=1)
    parser.add_argument("--rank", type=int, default=0)

    # Catch additional arguments for notebook environments
    args, _ = parser.parse_known_args()
    return args

def init_process(backend, init_method, world_size, rank):
    distributed.init_process_group(
        backend=backend,
        init_method=init_method,
        world_size=world_size,
        rank=rank,
    )

def load_and_preprocess_data():
    csv_path = '/content/bodyPerformance.csv'

    # Load the dataset
    data = pd.read_csv(csv_path)

    # Display the first few rows and column data types for debugging
    print(f'First few rows of the dataset:')
    print(data.head())
    print(f'Column data types:')
    print(data.dtypes)

    # Separate features and labels
    features = data.iloc[:, :-1]
    labels = data.iloc[:, -1]

    # Handle categorical data in features (if any)
    features = pd.get_dummies(features)

    # Encode labels if they are not numeric
    label_encoder = LabelEncoder()
    labels = label_encoder.fit_transform(labels)

    # Standardize the features
    scaler = StandardScaler()
    features = scaler.fit_transform(features)

    # Split into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(features, labels, test_size=0.2, random_state=42)

    # Convert to PyTorch tensors
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.long)
    X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val, dtype=torch.long)

    # Create PyTorch DataLoader for training and validation sets
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    return train_loader, val_loader

class SimpleNN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

class Trainer:
    def __init__(self, model, optimizer, train_loader, valid_loader, scheduler, device):
        self.model = model
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.valid_loader = valid_loader
        self.scheduler = scheduler
        self.device = device

    def fit(self, epochs=10):
        for epoch in range(epochs):
            self.model.train()
            running_loss = 0.0
            for inputs, labels in self.train_loader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)

                # Zero the parameter gradients
                self.optimizer.zero_grad()

                # Forward pass
                outputs = self.model(inputs)
                loss = nn.CrossEntropyLoss()(outputs, labels)

                # Backward pass and optimize
                loss.backward()
                self.optimizer.step()

                running_loss += loss.item()
            if self.scheduler:
                self.scheduler.step()
            print(f'Epoch {epoch + 1}/{epochs}, Loss: {running_loss / len(self.train_loader)}')

        print('Finished Training')

    def resume(self, checkpoint_path):
        checkpoint = torch.load(checkpoint_path)
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        if 'epoch' in checkpoint:
            self.start_epoch = checkpoint['epoch']

    def evaluate(self):
        self.model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in self.valid_loader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = self.model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        print(f'Validation Accuracy: {accuracy:.2f}%')

def main():
    torch.backends.cudnn.benchmark = True

    args = parse_args()

    if args.world_size > 1:
        init_process(args.backend, args.init_method, args.world_size, args.rank)

    device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")

    # Load and preprocess data
    train_loader, valid_loader = load_and_preprocess_data()

    # Initialize model
    input_size = train_loader.dataset.tensors[0].shape[1]
    num_classes = len(set(train_loader.dataset.tensors[1].numpy()))
    model = SimpleNN(input_size, num_classes)

    if args.world_size > 1:
        model.to(device)
        model = nn.parallel.DistributedDataParallel(model)
    else:
        if args.data_parallel:
            model = nn.DataParallel(model)
        model.to(device)

    # Initialize optimizer and scheduler
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    scheduler = None  # You can define and add a scheduler if needed

    # Initialize trainer
    trainer = Trainer(
        model=model,
        optimizer=optimizer,
        train_loader=train_loader,
        valid_loader=valid_loader,
        scheduler=scheduler,
        device=device
    )

    # Resume training from a checkpoint if provided
    if args.resume is not None:
        trainer.resume(args.resume)

    # Train and evaluate the model
    trainer.fit()
    trainer.evaluate()

if __name__ == "__main__":
    main()


First few rows of the dataset:
    age gender  height_cm  weight_kg  body fat_%  diastolic  systolic  \
0  27.0      M      172.3      75.24        21.3       80.0     130.0   
1  25.0      M      165.0      55.80        15.7       77.0     126.0   
2  31.0      M      179.6      78.00        20.1       92.0     152.0   
3  32.0      M      174.5      71.10        18.4       76.0     147.0   
4  28.0      M      173.8      67.70        17.1       70.0     127.0   

   gripForce  sit and bend forward_cm  sit-ups counts  broad jump_cm class  
0       54.9                     18.4            60.0          217.0     C  
1       36.4                     16.3            53.0          229.0     A  
2       44.8                     12.0            49.0          181.0     C  
3       41.4                     15.2            53.0          219.0     B  
4       43.5                     27.1            45.0          217.0     B  
Column data types:
age                        float64
gender        

In [None]:
pip install mlconfig



In [None]:
pip install pyyaml



In [None]:
!git clone https://github.com/narumiruna/efficientnet-pytorch.git

Cloning into 'efficientnet-pytorch'...
remote: Enumerating objects: 876, done.[K
remote: Counting objects: 100% (304/304), done.[K
remote: Compressing objects: 100% (181/181), done.[K
remote: Total 876 (delta 168), reused 208 (delta 121), pack-reused 572 (from 1)[K
Receiving objects: 100% (876/876), 293.73 KiB | 1.22 MiB/s, done.
Resolving deltas: 100% (479/479), done.


In [None]:
%cd efficientnet-pytorch

/content/efficientnet-pytorch


In [None]:
!pip install torch torchvision



In [None]:
!pip install kaggle




In [None]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

mv: cannot stat 'kaggle.json': No such file or directory
chmod: cannot access '/root/.kaggle/kaggle.json': No such file or directory


In [None]:
!kaggle datasets download -d misrakahmed/vegetable-image-dataset

Dataset URL: https://www.kaggle.com/datasets/misrakahmed/vegetable-image-dataset
License(s): CC-BY-SA-4.0
Downloading vegetable-image-dataset.zip to /content/efficientnet-pytorch
 99% 529M/534M [00:12<00:00, 46.7MB/s]
100% 534M/534M [00:12<00:00, 46.4MB/s]


In [None]:
!kaggle datasets download -d your-username/your-dataset

403 - Forbidden - Permission 'datasets.get' was denied


In [None]:
!unzip dataset.zip

unzip:  cannot find or open dataset.zip, dataset.zip.zip or dataset.zip.ZIP.


In [None]:
!pip install efficientnet-pytorch


Collecting efficientnet-pytorch
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l[?25hdone
  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.7.1-py3-none-any.whl size=16425 sha256=19114f6431700ff35f919081f01c6ab8071dd14661f9c9311991a8481d7d5d91
  Stored in directory: /root/.cache/pip/wheels/03/3f/e9/911b1bc46869644912bda90a56bcf7b960f20b5187feea3baf
Successfully built efficientnet-pytorch
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.7.1


In [None]:
# Install Kaggle API
!pip install kaggle

# Ensure kaggle.json is in the correct directory
import os
os.makedirs('/root/.kaggle', exist_ok=True)
!cp kaggle.json /root/.kaggle/

# Download the dataset using Kaggle API
!kaggle datasets download -d misrakahmed/vegetable-image-dataset


cp: cannot stat 'kaggle.json': No such file or directory
Dataset URL: https://www.kaggle.com/datasets/misrakahmed/vegetable-image-dataset
License(s): CC-BY-SA-4.0
vegetable-image-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)


In [None]:
import zipfile

# Unzip the downloaded dataset
with zipfile.ZipFile('vegetable-image-dataset.zip', 'r') as zip_ref:
    zip_ref.extractall('vegetable-image-dataset')


In [None]:
import os

# Check the directory structure
dataset_dir = 'vegetable-image-dataset'  # Adjust if necessary
for root, dirs, files in os.walk(dataset_dir):
    print(root, dirs, files)


vegetable-image-dataset ['Vegetable Images'] []
vegetable-image-dataset/Vegetable Images ['test', 'train', 'validation'] []
vegetable-image-dataset/Vegetable Images/test ['Broccoli', 'Potato', 'Bean', 'Bitter_Gourd', 'Tomato', 'Bottle_Gourd', 'Carrot', 'Capsicum', 'Radish', 'Cabbage', 'Brinjal', 'Pumpkin', 'Papaya', 'Cucumber', 'Cauliflower'] []
vegetable-image-dataset/Vegetable Images/test/Broccoli [] ['1162.jpg', '1072.jpg', '1101.jpg', '1042.jpg', '1109.jpg', '1107.jpg', '1123.jpg', '1094.jpg', '1012.jpg', '1052.jpg', '1102.jpg', '1132.jpg', '1200.jpg', '1051.jpg', '1064.jpg', '1110.jpg', '1125.jpg', '1159.jpg', '1032.jpg', '1069.jpg', '1170.jpg', '1038.jpg', '1036.jpg', '1093.jpg', '1031.jpg', '1062.jpg', '1081.jpg', '1056.jpg', '1185.jpg', '1182.jpg', '1044.jpg', '1045.jpg', '1096.jpg', '1025.jpg', '1108.jpg', '1113.jpg', '1112.jpg', '1117.jpg', '1120.jpg', '1075.jpg', '1029.jpg', '1140.jpg', '1084.jpg', '1068.jpg', '1161.jpg', '1002.jpg', '1149.jpg', '1074.jpg', '1047.jpg', '1104

In [None]:
import torch
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from efficientnet_pytorch import EfficientNet

# Define your data transforms
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load your dataset
dataset_path = 'vegetable-image-dataset'  # Path to the extracted dataset
train_dataset = datasets.ImageFolder(root=dataset_path, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Load the model
model = EfficientNet.from_pretrained('efficientnet-b0', num_classes=len(train_dataset.classes))

# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Training loop
num_epochs = 10  # Set the number of epochs
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss / len(train_loader)}')

# Save the model
torch.save(model.state_dict(), 'model.pth')


Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b0-355c32eb.pth
100%|██████████| 20.4M/20.4M [00:00<00:00, 236MB/s]


Loaded pretrained weights for efficientnet-b0


In [None]:
# Importing necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision import models

# Step 1: Device configuration (use GPU if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Step 2: Data Preprocessing and Augmentation
# Transformations for training and test sets
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),  # Data augmentation (random cropping)
    transforms.RandomHorizontalFlip(),     # Data augmentation (horizontal flip)
    transforms.ToTensor(),                 # Convert to Tensor
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),  # Normalize based on CIFAR-10 stats
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

# Step 3: Load CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=100, shuffle=False)

# Step 4: Load the Pretrained ResNet-18 Model
# Download pre-trained ResNet18 and fine-tune it for CIFAR-10 (change last fully connected layer)
model = models.resnet18(pretrained=True)

# Modify the final layer to output 10 classes (CIFAR-10 has 10 classes)
model.fc = nn.Linear(model.fc.in_features, 10)
model = model.to(device)

# Step 5: Loss function and Optimizer
criterion = nn.CrossEntropyLoss()  # Use cross-entropy for classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer

# Step 6: Training the Model
num_epochs = 10
total_step = len(train_loader)

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    correct = 0
    total = 0

    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Calculate accuracy
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

        # Print loss and accuracy at each step
        running_loss += loss.item()

        if (i + 1) % 100 == 0:  # Print every 100 batches
            print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{total_step}], '
                  f'Loss: {running_loss / (i + 1):.4f}, Accuracy: {100 * correct / total:.2f}%')

# Step 7: Testing and Evaluation
model.eval()  # Set model to evaluation mode
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    print(f'Test Accuracy: {100 * correct / total:.2f}%')


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 30069793.92it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified




Epoch [1/10], Step [100/391], Loss: 1.3681, Accuracy: 51.98%
Epoch [1/10], Step [200/391], Loss: 1.1760, Accuracy: 59.20%
Epoch [1/10], Step [300/391], Loss: 1.0790, Accuracy: 62.67%
Epoch [2/10], Step [100/391], Loss: 0.7842, Accuracy: 73.73%
Epoch [2/10], Step [200/391], Loss: 0.7719, Accuracy: 73.71%
Epoch [2/10], Step [300/391], Loss: 0.7482, Accuracy: 74.56%
Epoch [3/10], Step [100/391], Loss: 0.6624, Accuracy: 77.32%
Epoch [3/10], Step [200/391], Loss: 0.6616, Accuracy: 77.30%
Epoch [3/10], Step [300/391], Loss: 0.6560, Accuracy: 77.56%
Epoch [4/10], Step [100/391], Loss: 0.6031, Accuracy: 79.56%
Epoch [4/10], Step [200/391], Loss: 0.5987, Accuracy: 79.66%
Epoch [4/10], Step [300/391], Loss: 0.5962, Accuracy: 79.69%
Epoch [5/10], Step [100/391], Loss: 0.5527, Accuracy: 81.09%
Epoch [5/10], Step [200/391], Loss: 0.5523, Accuracy: 81.02%
Epoch [5/10], Step [300/391], Loss: 0.5569, Accuracy: 80.96%
Epoch [6/10], Step [100/391], Loss: 0.5217, Accuracy: 82.02%
Epoch [6/10], Step [200/