#### Conjunto de datos

In [1]:
!git clone https://github.com/JeffersonEspinalA/BD-TP1-v2.git

Cloning into 'BD-TP1-v2'...
remote: Enumerating objects: 19037, done.[K
remote: Counting objects: 100% (3179/3179), done.[K
remote: Compressing objects: 100% (3176/3176), done.[K
remote: Total 19037 (delta 1), reused 3179 (delta 1), pack-reused 15858 (from 1)[K
Receiving objects: 100% (19037/19037), 219.56 MiB | 60.02 MiB/s, done.
Resolving deltas: 100% (1/1), done.
Updating files: 100% (15857/15857), done.


#### Importar librerias

In [2]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPooling2D, Dropout, GlobalAveragePooling2D
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

#### Preparación de datos

In [3]:
data = pd.read_csv('/content/BD-TP1-v2/Plant-Village.csv')

In [4]:
IMG_HEIGHT, IMG_WIDTH = 256, 256

images = []
labels = []

for index, row in data.iterrows():
    img_path = os.path.join('/content/BD-TP1-v2/Plant Village Data A', row['Filename'])
    image = load_img(img_path, target_size=(IMG_HEIGHT, IMG_WIDTH))
    image = img_to_array(image) / 255.0
    images.append(image)
    labels.append(row['label'])

images = np.array(images)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

In [5]:
X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=42)

In [6]:
print(len(X_train))
print(len(X_val))

12684
3171


In [7]:
X_train.shape

(12684, 256, 256, 3)

#### Modelamiento

In [8]:
def cnn_1(input_shape=(256, 256, 3)):
    model = models.Sequential()

    model.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.3))

    model.add(layers.Conv2D(256, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.4))

    model.add(layers.GlobalAveragePooling2D())

    return model

feature_extractor = cnn_1()
features_train = feature_extractor.predict(X_train)
features_val = feature_extractor.predict(X_val)




In [None]:
def cnn_2(input_shape=(256, 256, 3)):
    model = models.Sequential()

    # Bloque 1: Conv + BatchNorm + MaxPooling
    model.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    # Bloque 2: Conv + BatchNorm + MaxPooling + Dropout
    model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.3))

    # Bloque 3: Conv + BatchNorm + MaxPooling
    model.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    # Global Average Pooling en lugar de Flatten
    model.add(layers.GlobalAveragePooling2D())

    return model

feature_extractor = cnn_2()
features_train = feature_extractor.predict(X_train)
features_val = feature_extractor.predict(X_val)




In [9]:
features_train.shape

(12684, 256)

In [10]:
scaler = StandardScaler()
features_train = scaler.fit_transform(features_train)
features_val = scaler.transform(features_val)

In [13]:
from sklearn.model_selection import GridSearchCV

In [None]:
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'gamma': ['scale', 'auto']
}

grid_search = GridSearchCV(svm.SVC(), param_grid, cv=5)
grid_search.fit(features_train, y_train)

print(f"Mejores hiperparámetros: {grid_search.best_params_}")

Mejores hiperparámetros: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}


In [11]:
svm_model = svm.SVC(kernel='rbf', C=10, gamma='scale')
svm_model.fit(features_train, y_train)

#### Resultados

In [12]:
y_pred = svm_model.predict(features_val)
accuracy = accuracy_score(y_val, y_pred)
print(f'Accuracy del modelo CNN + SVM: {accuracy:.5f}')

Accuracy del modelo CNN + SVM: 0.98234


In [13]:
from sklearn.metrics import classification_report

In [14]:
y_pred = svm_model.predict(features_val)
print(classification_report(y_val, y_pred))

              precision    recall  f1-score   support

           0       0.98      0.98      0.98       643
           1       0.99      0.99      0.99      1658
           2       1.00      1.00      1.00       261
           3       0.97      0.96      0.96       609

    accuracy                           0.98      3171
   macro avg       0.98      0.98      0.98      3171
weighted avg       0.98      0.98      0.98      3171



In [15]:
model = feature_extractor
model.save('mi_modelo.h5')

  saving_api.save_model(


In [16]:
from joblib import dump, load

dump(svm_model, 'svm_model.joblib')

['svm_model.joblib']

In [17]:
dump(scaler, 'pca_model.joblib')

['pca_model.joblib']