In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
import numpy as np

In [2]:
import os
import shutil
import random
from PIL import Image

In [3]:
original_dir = 'C:/TESIS/DATASET-TESIS/original'
cleaned_dir = 'C:/TESIS/DATASET-TESIS/cleaned'

# Crear el directorio cleaned si no existe
os.makedirs(cleaned_dir, exist_ok=True)

for folder in os.listdir(original_dir):
    folder_path = os.path.join(original_dir, folder)
    new_folder_path = os.path.join(cleaned_dir, folder)
    os.makedirs(new_folder_path, exist_ok=True)

    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        new_file_path = os.path.join(new_folder_path, filename)

        try:
            with Image.open(file_path) as img:
                img = img.convert('RGB')  # Convertir a RGB si no lo está
                img.save(new_file_path, 'JPEG')  # Guardar como .jpg
        except Exception as e:
            print(f"Error con el archivo {file_path}: {e}")


In [None]:
# Directorio original donde están las imágenes organizadas por categorías
cleaned_dir = r'C:\TESIS\DATASET-TESIS\original'
# Rutas para las carpetas de entrenamiento, validación y prueba
base_dir = r'C:\TESIS\DATASET-TESIS'
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

# Porcentajes de división
train_split = 0.7
val_split = 0.15
test_split = 0.15

# Crear carpetas de entrenamiento, validación y prueba
for split_dir in [train_dir, val_dir, test_dir]:
    os.makedirs(split_dir, exist_ok=True)

# Definir las categorías basadas en tus subcarpetas
categories = ['Tomato - Healthy', 'Tomato - Mite', 'Tomato - Mosca Blanca', 'Tomato - Tuta absoluta']

# Crear subcarpetas para cada categoría en las carpetas de entrenamiento, validación y prueba
for category in categories:
    os.makedirs(os.path.join(train_dir, category), exist_ok=True)
    os.makedirs(os.path.join(val_dir, category), exist_ok=True)
    os.makedirs(os.path.join(test_dir, category), exist_ok=True)

# Recorrer cada categoría y distribuir las imágenes
for category in categories:
    category_dir = os.path.join(cleaned_dir, category)
    images = os.listdir(category_dir)
    random.shuffle(images)  # Mezclar aleatoriamente las imágenes

    # Calcular el número de imágenes para cada conjunto
    train_size = int(len(images) * train_split)
    val_size = int(len(images) * val_split)
    test_size = len(images) - train_size - val_size

    # Asignar imágenes a cada conjunto
    train_images = images[:train_size]
    val_images = images[train_size:train_size + val_size]
    test_images = images[train_size + val_size:]

    # Mover las imágenes a sus respectivas carpetas
    for img in train_images:
        shutil.copy(os.path.join(category_dir, img), os.path.join(train_dir, category, img))

    for img in val_images:
        shutil.copy(os.path.join(category_dir, img), os.path.join(val_dir, category, img))

    for img in test_images:
        shutil.copy(os.path.join(category_dir, img), os.path.join(test_dir, category, img))

print("Las imágenes se han dividido en entrenamiento, validación y prueba.")


In [4]:
# Configuración de rutas de imágenes
train_path = r'C:\TESIS\DATASET-TESIS\train'
val_path = r'C:\TESIS\DATASET-TESIS\val'
test_path = r'C:\TESIS\DATASET-TESIS\test'

# Crear generador de datos con normalización y separación de datos
datagen = ImageDataGenerator(rescale=1./255)

# Generador de datos para entrenamiento
train_data = datagen.flow_from_directory(
    train_path,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical', 
)

# Generador de datos para validación
val_data = datagen.flow_from_directory(
    val_path,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical', 
)

# Generador de datos para prueba
test_data = datagen.flow_from_directory(
    test_path,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

Found 1037 images belonging to 4 classes.
Found 222 images belonging to 4 classes.
Found 224 images belonging to 4 classes.


In [5]:
# Cargar MobileNetV2 sin la última capa (include_top=False)
base_model = tf.keras.applications.MobileNetV2(input_shape=(224, 224, 3),
                                               include_top=False,
                                               weights='imagenet')
# Desactivar el entrenamiento de las capas del modelo base
base_model.trainable = False

In [6]:
# Función para extraer características
def extract_features(data_generator, model):
    features = []
    labels = []
    for inputs_batch, labels_batch in data_generator:
        features_batch = model.predict(inputs_batch)
        features.append(features_batch)
        labels.append(labels_batch)
        if len(features) * data_generator.batch_size >= data_generator.samples:
            break
    features = np.vstack(features)
    labels = np.vstack(labels)
    return features, labels

# Extraer características de entrenamiento y validación
train_features, train_labels = extract_features(train_data, base_model)
val_features, val_labels = extract_features(val_data, base_model)

# Aplanar las características para que SVM las use correctamente
train_features_flat = train_features.reshape((train_features.shape[0], -1))
val_features_flat = val_features.reshape((val_features.shape[0], -1))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 872ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 807ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 851ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 937ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 928ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 971ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 962ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 941ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 937ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9

In [7]:
# Definir el modelo SVM con escalado
svm_model = make_pipeline(StandardScaler(), SVC(kernel='linear', C=1))

In [8]:


# Entrenar el clasificador SVM
svm_model.fit(train_features_flat, np.argmax(train_labels, axis=1))

# Evaluación en datos de validación


In [9]:
val_preds = svm_model.predict(val_features_flat)
val_accuracy = accuracy_score(np.argmax(val_labels, axis=1), val_preds)
print(f'Precisión en validación: {val_accuracy * 100:.2f}%')


Precisión en validación: 95.05%


In [None]:
from tensorflow.keras.preprocessing.image import load_img, img_to_array



def predict_image(img_path, base_model, svm_modelm, class_names):
    # Cargar y preprocesar la imagen
    img = load_img(img_path, target_size=(224, 224))      #carga y redimensiona imagen
    img_array = img_to_array(img) / 255.0                 #normaliza los valores de los pixeles entre 0 y 1   
    img_array = np.expand_dims(img_array, axis=0)               #añade una dimension al array y lo transforma a batch
 
    # Extraer características con MobileNetV2
    features = base_model.predict(img_array)                    #extrae caracteristicas de la img
    features_flat = features.reshape((features.shape[0], -1))   #aplana array para svm

    # Clasificar usando SVM
    prediction = svm_model.predict(features_flat)
    predicted_class = class_names[prediction[0]]
    return predicted_class


In [None]:
# Ejemplo de uso
img_path = 'C:/TESIS/DATASET-TESIS/cleaned/Tomato - Mosca Blanca/aug_12_2663.png'
clase_predicha = predict_image(img_path, base_model, svm_model, class_names)
print(f'Plaga: {clase_predicha}')

In [10]:
import joblib
import json 

class_names = list(train_data.class_indices.keys())
class_ids = ["aa8mSAytfjx4d4CEeZmI", "kxuJyAtS3bbXMS8ImSLB", "9NGyA9ckRtMFmASEQCrt", "6k1uiaEaaYfhFcwQF2Ka"]

class_data = {name: class_id for name, class_id in zip(class_names, class_ids)}


# Guardar el modelo base
base_model.save("base_model.h5")
 
# Guardar el modelo SVM
joblib.dump(svm_model, "svm_model.pkl")

 
# Guardar los nombres de las clases en un archivo JSON
with open("class_names.json", "w") as f:
    json.dump(class_names, f) 
    
# Guardar los nombres de las clases en un archivo JSON
with open("ID_names.json", "w") as f:
    json.dump(class_data, f)


