In [4]:
import torch
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split
import xgboost as xgb
import joblib
import os
# Directory where images are stored (each class has its own subdirectory)
data_dir='C:\p\Medicinalplantdataset'
features=[]
labels = []
transform = transforms.Compose([
    transforms.Resize( (224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],[0.229,0.224,0.225])
])


In [2]:
efficientnet=torch.load('C:\p\efficientnet_b7_feature_extractor.pth')
efficientnet.eval()

  efficientnet=torch.load('C:\p\efficientnet_b7_feature_extractor.pth')


EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    3, 64, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d((0, 1, 0, 1))
  )
  (_bn0): BatchNorm2d(64, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        64, 64, kernel_size=(3, 3), stride=[1, 1], groups=64, bias=False
        (static_padding): ZeroPad2d((1, 1, 1, 1))
      )
      (_bn1): BatchNorm2d(64, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_se_reduce): Conv2dStaticSamePadding(
        64, 16, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        16, 64, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_project_conv): Conv2dStaticSamePadding(
        64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False
  

In [5]:
# Function to extract features from an image
def extract_features(image_path):
    img = Image.open(image_path).convert('RGB')
    img = transform(img).unsqueeze(0)
    with torch.no_grad():
        features = efficientnet(img).numpy().flatten()
    return features

# Loop over each class in the dataset
for class_name in os.listdir(data_dir):
    class_dir = os.path.join(data_dir, class_name)

    # Loop over each image in the class directory
    for img_file in os.listdir(class_dir):
        img_path = os.path.join(class_dir, img_file)

        # Extract features and append them to the features list
        img_features = extract_features(img_path)
        features.append(img_features)

        # Append the corresponding label
        labels.append(class_name)


In [6]:
features=np.array(features)
labels=np.array(labels)
x_train,x_test,y_train,y_test=train_test_split(features,labels,test_size=0.2,random_state=42)

In [8]:
from sklearn.preprocessing import LabelEncoder

# List of labels
labels = [
    "Aloevera", "Amla", "Amruta_Balli", "Ashwagandha", "Avacado",
    "Bamboo", "Basale", "Betel", "Betel_Nut", "Brahmi", "Castor",
    "Curry_Leaf", "Doddapatre", "Ekka", "Ganike", "Gauva", "Geranium",
    "Ginger", "Grapes", "Guards", "Hibiscus", "Jackfruit", "Jamun",
    "Jasmine", "Jatropha", "Lemongrass", "Mango", "Marigold",
    "Mint", "Moringa", "Neem", "Nooni", "Pappaya", "Pepper",
    "Pomegranate", "Raktachandini", "Rose", "Sapota", "Tulasi", "Wood_sorel"
]

# Create and fit a LabelEncoder
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(labels)

# Get the mapping of classes to numerical labels
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print(label_mapping)


{np.str_('Aloevera'): np.int64(0), np.str_('Amla'): np.int64(1), np.str_('Amruta_Balli'): np.int64(2), np.str_('Ashwagandha'): np.int64(3), np.str_('Avacado'): np.int64(4), np.str_('Bamboo'): np.int64(5), np.str_('Basale'): np.int64(6), np.str_('Betel'): np.int64(7), np.str_('Betel_Nut'): np.int64(8), np.str_('Brahmi'): np.int64(9), np.str_('Castor'): np.int64(10), np.str_('Curry_Leaf'): np.int64(11), np.str_('Doddapatre'): np.int64(12), np.str_('Ekka'): np.int64(13), np.str_('Ganike'): np.int64(14), np.str_('Gauva'): np.int64(15), np.str_('Geranium'): np.int64(16), np.str_('Ginger'): np.int64(17), np.str_('Grapes'): np.int64(18), np.str_('Guards'): np.int64(19), np.str_('Hibiscus'): np.int64(20), np.str_('Jackfruit'): np.int64(21), np.str_('Jamun'): np.int64(22), np.str_('Jasmine'): np.int64(23), np.str_('Jatropha'): np.int64(24), np.str_('Lemongrass'): np.int64(25), np.str_('Mango'): np.int64(26), np.str_('Marigold'): np.int64(27), np.str_('Mint'): np.int64(28), np.str_('Moringa'): n

In [11]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)


In [12]:
clf=xgb.XGBClassifier()
clf.fit(x_train, y_train_encoded)
accuracy=clf.score(x_test,label_encoder.transform(y_test))
print(f'Test accuracy:{accuracy*100:.2f}%')

Test accuracy:90.00%


In [13]:
# Save the trained XGBoost model
clf.save_model('xgboost_medicinal_plant_model.pkl')

# Save the EfficientNet model for future feature extraction
torch.save(efficientnet, 'efficientnet_b7_feature_extractor.pth')

import joblib

# Save the Label Encoder
joblib.dump(label_encoder, 'label_encoder.pkl')



['label_encoder.pkl']