In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import *
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression, Lasso, ElasticNet, Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from scipy.stats import pearsonr
from collections import defaultdict
from tensorflow.keras.preprocessing.image import load_img, img_to_array, array_to_img, ImageDataGenerator
import random
from tqdm import tqdm
import time
from concurrent.futures import ThreadPoolExecutor
import os
from sklearn.base import clone

# --- Configuração dos modelos ---
from tensorflow.keras.applications.xception import Xception, preprocess_input as preprocess_xception
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input as preprocess_vgg16
from tensorflow.keras.applications.vgg19 import VGG19, preprocess_input as preprocess_vgg19
from tensorflow.keras.applications.resnet import ResNet50, ResNet101, ResNet152, preprocess_input as preprocess_resnet
from tensorflow.keras.applications.resnet_v2 import ResNet50V2, ResNet101V2, ResNet152V2, preprocess_input as preprocess_resnet_v2
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input as preprocess_inception_v3
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input as preprocess_inception_resnet_v2
from tensorflow.keras.applications.mobilenet import MobileNet, preprocess_input as preprocess_mobilenet
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input as preprocess_mobilenet_v2
from tensorflow.keras.applications.densenet import DenseNet121, DenseNet169, DenseNet201, preprocess_input as preprocess_densenet
from tensorflow.keras.applications.nasnet import NASNetMobile, NASNetLarge, preprocess_input as preprocess_nasnet
from tensorflow.keras.applications.efficientnet import EfficientNetB0, EfficientNetB1, EfficientNetB2, EfficientNetB3, EfficientNetB4, EfficientNetB5, EfficientNetB6, EfficientNetB7, preprocess_input as preprocess_efficientnet

model_input_sizes = {model: (1024, 1024) for model in ['EfficientNetB1']}

model_info = {'EfficientNetB1': (EfficientNetB1, preprocess_efficientnet)}

# Aumentadores globais
data_augmenters = [
    ImageDataGenerator(horizontal_flip=True),
    ImageDataGenerator(brightness_range=[0.8, 1.2]),
    ImageDataGenerator(preprocessing_function=lambda x: x + np.random.normal(0, 5, x.shape))
]

# --- Funções auxiliares ---
def augment_and_extract(model, img_path, model_name):
    img = load_img(img_path, target_size=model_input_sizes[model_name])
    augmenter = random.choice(data_augmenters)
    aug_img = augment_image(img, augmenter)
    aug_img_array = img_to_array(aug_img)
    aug_img_array = np.expand_dims(aug_img_array, axis=0)
    preprocessed_aug_img = model_info[model_name][1](aug_img_array)
    feats = model.predict(preprocessed_aug_img, verbose=0)
    return feats.flatten()

def augment_image(img, augmenter):
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    aug_iter = augmenter.flow(img_array, batch_size=1)
    aug_img = next(aug_iter)[0].astype(np.uint8)
    return array_to_img(aug_img)

def extract_features_from_paths(model, img_paths, model_name):
    features = []
    for img_path in img_paths:
        img = load_img(img_path, target_size=model_input_sizes[model_name])
        img_array = img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        preprocessed_img = model_info[model_name][1](img_array)
        feats = model.predict(preprocessed_img, verbose=0)
        features.append(feats.flatten())
    return features

def extract_features_augmented(model, img_paths, model_name, target_len=50):
    features = extract_features_from_paths(model, img_paths, model_name)
    needed = target_len - len(features)
    if needed > 0:
        with ThreadPoolExecutor(max_workers=1) as executor:
            futures = [executor.submit(augment_and_extract, model, random.choice(img_paths), model_name) for _ in range(needed)]
            for f in futures:
                features.append(f.result())
    return features[:target_len]

def run_stratified_kfold_regressions_from_images(df, model, model_name):
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    all_results = []
    all_preds_txt = []

    models = [
        ('Linear Regression', LinearRegression()),
        ('Random Forest', RandomForestRegressor(random_state=42)),
        ('SVR', SVR()),
        ('Lasso', Lasso(random_state=42)),
        ('ElasticNet', ElasticNet(random_state=42)),
        ('Ridge', Ridge(random_state=42))
    ]

    for fold, (train_idx, test_idx) in enumerate(skf.split(df, df['Group'])):
        df_train = df.iloc[train_idx]
        df_test = df.iloc[test_idx]

        id_to_train_paths = defaultdict(list)
        id_to_test_paths = defaultdict(list)

        for _, row in df_train.iterrows():
            id_to_train_paths[row['ID Animal']].append(row['Path'])
        for _, row in df_test.iterrows():
            id_to_test_paths[row['ID Animal']].append(row['Path'])

        train_features, test_features = {}, {}
        for animal_id in tqdm(id_to_train_paths.keys(), desc=f"Fold {fold+1} - Treino ({model_name})"):
            paths = id_to_train_paths[animal_id]
            feats = extract_features_augmented(model, paths, model_name, target_len=50)
            train_features[animal_id] = np.mean(feats, axis=0)

        for animal_id in tqdm(id_to_test_paths.keys(), desc=f"Fold {fold+1} - Teste ({model_name})"):
            paths = id_to_test_paths[animal_id]
            feats = extract_features_from_paths(model, paths, model_name)
            test_features[animal_id] = np.mean(feats, axis=0)

        df_weights = df[['ID Animal', 'Peso', 'Group']].drop_duplicates().set_index('ID Animal')
        train_df = pd.DataFrame.from_dict(train_features, orient='index').join(df_weights)
        test_df = pd.DataFrame.from_dict(test_features, orient='index').join(df_weights)

        X_train = train_df.iloc[:, :-2].values
        y_train = train_df['Peso'].values
        X_test = test_df.iloc[:, :-2].values
        y_test = test_df['Peso'].values

        for name, reg_model in models:
            model_clone = clone(reg_model)
            model_clone.fit(X_train, y_train)
            y_pred = model_clone.predict(X_test)

            print(f"\n--- Regressão: {name} | Modelo CNN: {model_name} ---")
            fold_metrics = []

            for i, (real, pred) in enumerate(zip(y_test, y_pred)):
                animal_id = test_df.index[i]
                all_preds_txt.append((model_name, name, fold+1, animal_id, real, pred))

            mse = mean_squared_error(y_test, y_pred)
            rmse = np.sqrt(mse)
            mae = mean_absolute_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            r = pearsonr(y_test, y_pred)[0] if np.var(y_pred) > 0 else 0

            print(f"[Fold {fold+1}] MSE: {mse:.2f} | RMSE: {rmse:.2f} | MAE: {mae:.2f} | R2: {r2:.2f} | R: {r:.2f}")

            all_results.append({
                'REGRESSAO': name,
                'FOLD': fold + 1,
                'MSE': mse,
                'RMSE': rmse,
                'MAE': mae,
                'R2': r2,
                'R': r,
                'CNN': model_name
            })

    lines = ["CNN,REGRESSAO,FOLD,ID,PESO_REAL,PESO_PREDITO"]
    for cnn, reg, fold, id_, real, pred in all_preds_txt:
        lines.append(f"{cnn},{reg},{fold},{id_},{real:.2f},{pred:.2f}")
    with open(f"predicoes_completas_{model_name}.txt", "w", encoding="utf-8") as f:
        f.write("\n".join(lines))
    print(f"\n📝 Predições consolidadas salvas em predicoes_completas_{model_name}.txt")

    df_result = pd.DataFrame(all_results)
    for reg_name in df_result['REGRESSAO'].unique():
        df_model = df_result[df_result['REGRESSAO'] == reg_name]
        print(f"\n>>> MÉDIA DOS RESULTADOS - {reg_name}")
        for metric in ['MSE', 'RMSE', 'MAE', 'R2', 'R']:
            values = df_model[metric].values
            print(f"{metric}: {np.mean(values):.2f} ± {np.std(values):.2f}")

    df_result.to_excel(f"metricas_por_fold_{model_name}.xlsx", index=False)
    print(f"📊 Métricas por fold salvas em metricas_por_fold_{model_name}.xlsx")
    return all_results

# --- Execução principal ---
df = pd.read_csv('all_animal_images.csv')
df['Group'] = df['ID Animal'].apply(lambda x: 'C1' if 'C1_' in x else 'C2')

final_results = []
for model_name in model_info.keys():
    print(f"\nIniciando processamento com modelo: {model_name}")
    start_time = time.time()
    model = model_info[model_name][0](weights='imagenet', include_top=False, pooling='avg')
    results = run_stratified_kfold_regressions_from_images(df, model, model_name)
    final_results.extend(results)
    elapsed = time.time() - start_time
    print(f"Tempo total para {model_name}: {elapsed/60:.2f} minutos")

results_df = pd.DataFrame(final_results)
results_df.to_excel('regression_results_1024_EfficientNetB1_augmented_trainonly_stratified.xlsx', index=False)
print("Resultados salvos em 'regression_results_1024_EfficientNetB1_augmented_trainonly_stratified.xlsx'.")


Iniciando processamento com modelo: EfficientNetB1


Fold 1 - Treino (EfficientNetB1): 100%|████████████████████████████████████████████████| 98/98 [15:41<00:00,  9.60s/it]
Fold 1 - Teste (EfficientNetB1): 100%|█████████████████████████████████████████████████| 61/61 [00:23<00:00,  2.60it/s]



--- Regressão: Linear Regression | Modelo CNN: EfficientNetB1 ---
[Fold 1] MSE: 23.87 | RMSE: 4.89 | MAE: 3.73 | R2: 0.81 | R: 0.94


  r = pearsonr(y_test, y_pred)[0] if np.var(y_pred) > 0 else 0



--- Regressão: Random Forest | Modelo CNN: EfficientNetB1 ---
[Fold 1] MSE: 65.34 | RMSE: 8.08 | MAE: 6.33 | R2: 0.48 | R: 0.84

--- Regressão: SVR | Modelo CNN: EfficientNetB1 ---
[Fold 1] MSE: 123.03 | RMSE: 11.09 | MAE: 8.91 | R2: 0.02 | R: 0.58

--- Regressão: Lasso | Modelo CNN: EfficientNetB1 ---
[Fold 1] MSE: 126.71 | RMSE: 11.26 | MAE: 9.42 | R2: -0.01 | R: nan

--- Regressão: ElasticNet | Modelo CNN: EfficientNetB1 ---
[Fold 1] MSE: 120.69 | RMSE: 10.99 | MAE: 9.26 | R2: 0.04 | R: 0.62

--- Regressão: Ridge | Modelo CNN: EfficientNetB1 ---
[Fold 1] MSE: 24.64 | RMSE: 4.96 | MAE: 3.88 | R2: 0.80 | R: 0.94


Fold 2 - Treino (EfficientNetB1): 100%|████████████████████████████████████████████████| 97/97 [16:15<00:00, 10.05s/it]
Fold 2 - Teste (EfficientNetB1): 100%|█████████████████████████████████████████████████| 71/71 [00:36<00:00,  1.96it/s]



--- Regressão: Linear Regression | Modelo CNN: EfficientNetB1 ---
[Fold 2] MSE: 13.13 | RMSE: 3.62 | MAE: 2.68 | R2: 0.90 | R: 0.95


  r = pearsonr(y_test, y_pred)[0] if np.var(y_pred) > 0 else 0



--- Regressão: Random Forest | Modelo CNN: EfficientNetB1 ---
[Fold 2] MSE: 58.75 | RMSE: 7.66 | MAE: 6.17 | R2: 0.53 | R: 0.84

--- Regressão: SVR | Modelo CNN: EfficientNetB1 ---
[Fold 2] MSE: 122.35 | RMSE: 11.06 | MAE: 8.95 | R2: 0.03 | R: 0.76

--- Regressão: Lasso | Modelo CNN: EfficientNetB1 ---
[Fold 2] MSE: 126.16 | RMSE: 11.23 | MAE: 9.51 | R2: -0.00 | R: nan

--- Regressão: ElasticNet | Modelo CNN: EfficientNetB1 ---
[Fold 2] MSE: 122.94 | RMSE: 11.09 | MAE: 9.44 | R2: 0.02 | R: 0.71

--- Regressão: Ridge | Modelo CNN: EfficientNetB1 ---
[Fold 2] MSE: 14.02 | RMSE: 3.74 | MAE: 2.70 | R2: 0.89 | R: 0.94


Fold 3 - Treino (EfficientNetB1): 100%|████████████████████████████████████████████████| 96/96 [22:48<00:00, 14.25s/it]
Fold 3 - Teste (EfficientNetB1): 100%|█████████████████████████████████████████████████| 72/72 [00:42<00:00,  1.69it/s]



--- Regressão: Linear Regression | Modelo CNN: EfficientNetB1 ---
[Fold 3] MSE: 34.22 | RMSE: 5.85 | MAE: 4.30 | R2: 0.67 | R: 0.90

--- Regressão: Random Forest | Modelo CNN: EfficientNetB1 ---
[Fold 3] MSE: 41.70 | RMSE: 6.46 | MAE: 5.37 | R2: 0.59 | R: 0.87

--- Regressão: SVR | Modelo CNN: EfficientNetB1 ---
[Fold 3] MSE: 95.41 | RMSE: 9.77 | MAE: 8.05 | R2: 0.07 | R: 0.63

--- Regressão: Lasso | Modelo CNN: EfficientNetB1 ---
[Fold 3] MSE: 102.57 | RMSE: 10.13 | MAE: 8.81 | R2: -0.00 | R: 0.00

--- Regressão: ElasticNet | Modelo CNN: EfficientNetB1 ---
[Fold 3] MSE: 100.57 | RMSE: 10.03 | MAE: 8.79 | R2: 0.02 | R: 0.51

--- Regressão: Ridge | Modelo CNN: EfficientNetB1 ---
[Fold 3] MSE: 31.19 | RMSE: 5.59 | MAE: 4.06 | R2: 0.70 | R: 0.90


Fold 4 - Treino (EfficientNetB1): 100%|████████████████████████████████████████████████| 96/96 [28:29<00:00, 17.81s/it]
Fold 4 - Teste (EfficientNetB1): 100%|█████████████████████████████████████████████████| 71/71 [00:49<00:00,  1.45it/s]



--- Regressão: Linear Regression | Modelo CNN: EfficientNetB1 ---
[Fold 4] MSE: 38.86 | RMSE: 6.23 | MAE: 4.97 | R2: 0.69 | R: 0.93


  r = pearsonr(y_test, y_pred)[0] if np.var(y_pred) > 0 else 0



--- Regressão: Random Forest | Modelo CNN: EfficientNetB1 ---
[Fold 4] MSE: 60.05 | RMSE: 7.75 | MAE: 6.27 | R2: 0.53 | R: 0.83

--- Regressão: SVR | Modelo CNN: EfficientNetB1 ---
[Fold 4] MSE: 122.57 | RMSE: 11.07 | MAE: 8.90 | R2: 0.04 | R: 0.77

--- Regressão: Lasso | Modelo CNN: EfficientNetB1 ---
[Fold 4] MSE: 127.47 | RMSE: 11.29 | MAE: 9.54 | R2: -0.00 | R: nan

--- Regressão: ElasticNet | Modelo CNN: EfficientNetB1 ---
[Fold 4] MSE: 124.01 | RMSE: 11.14 | MAE: 9.49 | R2: 0.02 | R: 0.65

--- Regressão: Ridge | Modelo CNN: EfficientNetB1 ---
[Fold 4] MSE: 35.29 | RMSE: 5.94 | MAE: 4.68 | R2: 0.72 | R: 0.93


Fold 5 - Treino (EfficientNetB1): 100%|████████████████████████████████████████████████| 98/98 [29:36<00:00, 18.13s/it]
Fold 5 - Teste (EfficientNetB1): 100%|█████████████████████████████████████████████████| 65/65 [00:46<00:00,  1.41it/s]



--- Regressão: Linear Regression | Modelo CNN: EfficientNetB1 ---
[Fold 5] MSE: 15.51 | RMSE: 3.94 | MAE: 2.99 | R2: 0.85 | R: 0.94

--- Regressão: Random Forest | Modelo CNN: EfficientNetB1 ---
[Fold 5] MSE: 45.63 | RMSE: 6.76 | MAE: 5.72 | R2: 0.57 | R: 0.89

--- Regressão: SVR | Modelo CNN: EfficientNetB1 ---
[Fold 5] MSE: 102.11 | RMSE: 10.11 | MAE: 8.21 | R2: 0.03 | R: 0.79

--- Regressão: Lasso | Modelo CNN: EfficientNetB1 ---
[Fold 5] MSE: 105.96 | RMSE: 10.29 | MAE: 8.87 | R2: -0.01 | R: nan

--- Regressão: ElasticNet | Modelo CNN: EfficientNetB1 ---
[Fold 5] MSE: 102.78 | RMSE: 10.14 | MAE: 8.85 | R2: 0.02 | R: 0.55

--- Regressão: Ridge | Modelo CNN: EfficientNetB1 ---
[Fold 5] MSE: 11.72 | RMSE: 3.42 | MAE: 2.78 | R2: 0.89 | R: 0.95

📝 Predições consolidadas salvas em predicoes_completas_EfficientNetB1.txt

>>> MÉDIA DOS RESULTADOS - Linear Regression
MSE: 25.12 ± 10.09
RMSE: 4.91 ± 1.02
MAE: 3.73 ± 0.84
R2: 0.78 ± 0.09
R: 0.93 ± 0.02

>>> MÉDIA DOS RESULTADOS - Random Fore

  r = pearsonr(y_test, y_pred)[0] if np.var(y_pred) > 0 else 0


📊 Métricas por fold salvas em metricas_por_fold_EfficientNetB1.xlsx
Tempo total para EfficientNetB1: 116.57 minutos
Resultados salvos em 'regression_results_1024_EfficientNetB1_augmented_trainonly_stratified.xlsx'.
