In [1]:
import torch
from torchvision import models, transforms
import torch.nn as nn
import cv2
import numpy as np
import time
import os
from picamera2 import Picamera2, Preview

In [2]:
model = models.mobilenet_v2(pretrained=False)

num_features = model.classifier[1].in_features
model.classifier = nn.Sequential(
    nn.Linear(num_features, 9),
    nn.Dropout(0.4)
)


weights_path = r"/home/nikhilpi5/Desktop/new_fruit_weights.pth"
model.load_state_dict(torch.load(weights_path, map_location=torch.device('cpu')))


model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)



MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [3]:
print(device)

cpu


In [None]:
preprocess = transforms.Compose([
    transforms.ToPILImage(),  
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),  
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class_names = ['apple', 'banana', 'bitter gourd', 'broccoli', 'capsicum', 'carrot', 'orange', 'rice', 'tomato']

#open cam
picam2 = Picamera2()
config = picam2.create_still_configuration(main={"format": 'RGB888', "size": (640, 480)})
picam2.configure(config)
picam2.start()

print("Press 'c' to capture an image. Press 'q' to quit.")

while True:
    
    frame = picam2.capture_array()
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)


    cv2.imshow("Live Preview", frame)

    key = cv2.waitKey(1) & 0xFF

    if key == ord('c'):

        input_tensor = preprocess(frame_rgb)
        input_batch = input_tensor.unsqueeze(0).to(device)

        with torch.no_grad():
            output = model(input_batch)
        _, predicted_class = torch.max(output, 1)
        predicted_class_name = class_names[predicted_class.item()]

        
        label = f"Predicted: {predicted_class_name}"
        print(label)
        result_img = frame.copy()
        cv2.putText(result_img, label, (10, 40), cv2.FONT_HERSHEY_SIMPLEX,
                    1.2, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.imshow("Prediction", result_img)

        os.system(f"espeak '{predicted_class_name}'")

        time.sleep(1)

    elif key == ord('q'):
        break


cv2.destroyAllWindows()
picam2.stop()

[0:10:42.235172253] [3225] [1;32m INFO [1;37mCamera [1;34mcamera_manager.cpp:326 [0mlibcamera v0.5.0+59-d83ff0a4
[0:10:42.264945084] [3249] [1;32m INFO [1;37mRPI [1;34mpisp.cpp:720 [0mlibpisp version v1.2.1 981977ff21f3 29-04-2025 (14:13:50)
[0:10:42.275684892] [3249] [1;32m INFO [1;37mRPI [1;34mpisp.cpp:1179 [0mRegistered camera /base/axi/pcie@1000120000/rp1/i2c@80000/imx219@10 to CFE device /dev/media0 and ISP device /dev/media1 using PiSP variant BCM2712_C0
[0:10:42.283399747] [3225] [1;32m INFO [1;37mCamera [1;34mcamera.cpp:1205 [0mconfiguring streams: (0) 640x480-RGB888 (1) 640x480-BGGR_PISP_COMP1
[0:10:42.283608306] [3249] [1;32m INFO [1;37mRPI [1;34mpisp.cpp:1483 [0mSensor: /base/axi/pcie@1000120000/rp1/i2c@80000/imx219@10 - Selected sensor format: 640x480-SBGGR10_1X10 - Selected CFE format: 640x480-PC1B


Press 'c' to capture an image. Press 'q' to quit.



(python3:3225): GLib-GObject-CRITICAL **: 13:02:04.546: g_object_unref: assertion 'G_IS_OBJECT (object)' failed


Predicted: banana
