In [1]:
%%time
from skimage.io import imread
from skimage.transform import resize
import os
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import gc
import gradio as gr
from datetime import datetime
from joblib import dump

start_time = datetime.now()


CPU times: total: 3.17 s
Wall time: 6.59 s


In [2]:
%%time
# Importar garbage collector al principio del script

gc.enable()

CPU times: total: 0 ns
Wall time: 0 ns


In [3]:
%%time
# Función para cargar y procesar imágenes en lotes utilizando un generador

def process_images_in_batches(folder_path, label, target_size, batch_size=10):
    file_list = os.listdir(folder_path)
    all_images = []
    all_labels = []
    with tqdm(total=len(file_list)) as pbar:
        for i, filename in enumerate(file_list):
            img = imread(os.path.join(folder_path, filename))
            if img is not None:
                img_resized = resize(img, output_shape=target_size)
                all_images.append(img_resized.flatten())
                all_labels.append(label)
            if (i + 1) % batch_size == 0 or i + 1 == len(file_list):
                yield np.array(all_images), np.array(all_labels)
                all_images = []
                all_labels = []
                pbar.update(min(batch_size, len(file_list) - i - 1))
                # Liberar memoria eliminando variables no utilizadas y recolector de basura
                del img
                gc.collect()

CPU times: total: 0 ns
Wall time: 0 ns


In [4]:
%%time
# Definir las rutas de los directorios

target_size = (200, 200)
label_B_train = 0
label_N_train = 1

CPU times: total: 0 ns
Wall time: 0 ns


In [5]:
%%time
# Directorios de entrenamiento

directorio_actual = os.getcwd()

folder_path_B_train = os.path.join(directorio_actual, 'TRAIN.1', 'B')
folder_path_N_train = os.path.join(directorio_actual, 'TRAIN.1', 'N')

CPU times: total: 0 ns
Wall time: 0 ns


In [6]:
%%time
# Procesar imágenes de entrenamiento

X_B_train, y_B_train = zip(*list(process_images_in_batches(folder_path_B_train, label_B_train, target_size, batch_size=10)))
X_N_train, y_N_train = zip(*list(process_images_in_batches(folder_path_N_train, label_N_train, target_size, batch_size=10)))

100%|█████████▉| 3990/4000 [00:58<00:00, 67.82it/s]
100%|█████████▉| 3990/4000 [00:58<00:00, 68.05it/s]

CPU times: total: 1min 53s
Wall time: 1min 57s





In [7]:
%%time
# Concatenar datos y etiquetas de entrenamiento

X_train = np.concatenate(X_B_train + X_N_train, axis=0)
y_train = np.concatenate(y_B_train + y_N_train, axis=0)

CPU times: total: 1.62 s
Wall time: 1.63 s


In [8]:
%%time
# Directorios de prueba

folder_path_B_test = os.path.join(directorio_actual, 'TEST', 'B')
folder_path_N_test = os.path.join(directorio_actual, 'TEST', 'N')

CPU times: total: 0 ns
Wall time: 0 ns


In [9]:
%%time
# Procesar imágenes de prueba

X_B_test, y_B_test = zip(*list(process_images_in_batches(folder_path_B_test, label_B_train, target_size, batch_size=10)))
X_N_test, y_N_test = zip(*list(process_images_in_batches(folder_path_N_test, label_N_train, target_size, batch_size=10)))

X_test = np.concatenate(X_B_test + X_N_test, axis=0)
y_test = np.concatenate(y_B_test + y_N_test, axis=0)

 99%|█████████▉| 990/1000 [00:14<00:00, 67.01it/s]
 99%|█████████▉| 990/1000 [00:14<00:00, 67.13it/s]


CPU times: total: 29.1 s
Wall time: 30.1 s


In [10]:
%%time
# Normalizar los datos

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

CPU times: total: 19.8 s
Wall time: 19.9 s


In [11]:
%%time
# Inicializar el modelo de regresión logística, entrenarlo y hacer predicciones

clf = LogisticRegression(solver='saga', max_iter=1000)  # Aumentar el número máximo de iteraciones
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)



CPU times: total: 2h 2min 29s
Wall time: 2h 2min 35s


In [12]:
%%time
# Evaluar el rendimiento del modelo

acc = accuracy_score(y_test, y_pred)
conf_mat = confusion_matrix(y_test, y_pred)
cls_report = classification_report(y_test, y_pred)

CPU times: total: 109 ms
Wall time: 15 ms


In [17]:
%%time
# Imprimir los resultados de la evaluación

print(f"Accuracy: {acc}")
print(f"Confusion Matrix: \n{conf_mat}")
print(f"Classification Report: \n{cls_report}")

end_time = datetime.now()
time_diff = end_time - start_time
minutes = time_diff.seconds // 60
seconds = time_diff.seconds % 60

print(f"Tiempo de ejecución total: {minutes} minutos y {seconds} segundos")

Accuracy: 0.7185
Confusion Matrix: 
[[652 348]
 [215 785]]
Classification Report: 
              precision    recall  f1-score   support

           0       0.75      0.65      0.70      1000
           1       0.69      0.79      0.74      1000

    accuracy                           0.72      2000
   macro avg       0.72      0.72      0.72      2000
weighted avg       0.72      0.72      0.72      2000

Tiempo de ejecución total: 690 minutos y 22 segundos
CPU times: total: 0 ns
Wall time: 0 ns


In [None]:
# Guardar el modelo entrenado
model_filename = 'logistic_regression_model.joblib'
dump(clf, model_filename)
print(f"Modelo guardado como {model_filename}")

In [14]:
%%time
# Función para procesar y predecir imágenes utilizando el modelo entrenado

def process_image_and_predict(image):
    img = np.array(image)
    img_resized = resize(img, output_shape=target_size).flatten()
    img_resized_scaled = scaler.transform([img_resized])  # Escalar la imagen
    prediction = clf.predict(img_resized_scaled)  # Realizar la predicción
    etiquetas = {0: 'Biodegradable', 1: 'No Biodegradable'}
#    print(f"Prediction: {prediction}, Label B: {label_B_train}, Label N: {label_N_train}")
    return etiquetas[prediction[0]], prediction[0]


CPU times: total: 0 ns
Wall time: 0 ns


In [15]:
%%time
def mostrar_resultado(imagen):
    etiqueta, _ =  process_image_and_predict(imagen)
    return f"La imagen es {etiqueta}."
iface = gr.Interface(
    fn=mostrar_resultado,
    inputs="image",
    outputs="label",
    title="Biodegradable o No?",
    description="Suba una Imagen para identifcar si es Biodegradable o No!"
)

iface.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


CPU times: total: 125 ms
Wall time: 141 ms


