# Treinamento de Modelo de Previsão de Preço de Diamantes

Este notebook treina dois modelos Keras (Redes Neurais) para prever o preço de diamantes com base em suas características (4Cs e dimensões). Também implementa uma lógica de Ensemble (Voting) para combinar as previsões.

In [None]:
# Instalar dependências necessárias
!pip install tensorflow pandas seaborn scikit-learn joblib numpy

In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
import joblib
import os

print(f"TensorFlow Version: {tf.__version__}")

## 1. Carregamento e Pré-processamento de Dados

In [None]:
# Carregar dataset
# Usamos o seaborn como fonte fácil, mas você pode fazer upload do CSV do Kaggle
try:
    df = pd.read_csv('diamonds.csv')
    print("Carregado de diamonds.csv")
except:
    print("Arquivo local não encontrado, carregando do Seaborn...")
    df = sns.load_dataset('diamonds')

print(df.head())

In [None]:
# Pré-processamento
y = df['price']
X = df.drop('price', axis=1)

categorical_cols = ['cut', 'color', 'clarity']
numerical_cols = ['carat', 'depth', 'table', 'x', 'y', 'z']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), categorical_cols)
    ]
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

input_shape = X_train_processed.shape[1]
print(f"Input Shape: {input_shape}")

## 2. Definição e Treinamento dos Modelos

In [None]:
def create_model_1(input_shape):
    model = keras.Sequential([
        layers.Dense(64, activation='relu', input_shape=[input_shape]),
        layers.Dense(32, activation='relu'),
        layers.Dense(1)
    ])
    model.compile(optimizer='adam', loss='mae', metrics=['mae'])
    return model

def create_model_2(input_shape):
    model = keras.Sequential([
        layers.Dense(128, activation='relu', input_shape=[input_shape]),
        layers.Dropout(0.2),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(1)
    ])
    model.compile(optimizer='adam', loss='mae', metrics=['mae'])
    return model

In [None]:
print("Treinando Modelo 1...")
model1 = create_model_1(input_shape)
history1 = model1.fit(X_train_processed, y_train, validation_split=0.2, batch_size=32, epochs=50, verbose=0)

print("Treinando Modelo 2...")
model2 = create_model_2(input_shape)
history2 = model2.fit(X_train_processed, y_train, validation_split=0.2, batch_size=32, epochs=50, verbose=0)

print("Treinamento concluído.")

## 3. Avaliação e Voting

In [None]:
loss1, mae1 = model1.evaluate(X_test_processed, y_test, verbose=0)
loss2, mae2 = model2.evaluate(X_test_processed, y_test, verbose=0)

print(f"Modelo 1 MAE: {mae1:.2f}")
print(f"Modelo 2 MAE: {mae2:.2f}")

# Voting (Média)
pred1 = model1.predict(X_test_processed).flatten()
pred2 = model2.predict(X_test_processed).flatten()
pred_voting = (pred1 + pred2) / 2

mae_voting = np.mean(np.abs(y_test - pred_voting))
print(f"Voting Ensemble MAE: {mae_voting:.2f}")

## 4. Salvar Modelos
Salva os modelos no formato `.keras` e o preprocessor com `joblib`.

In [None]:
model1.save("model1.keras")
model2.save("model2.keras")
joblib.dump(preprocessor, "preprocessor.joblib")
print("Arquivos salvos: model1.keras, model2.keras, preprocessor.joblib")

In [None]:
# Exemplo de como carregar e usar
loaded_model = keras.models.load_model("model1.keras")
# loaded_preprocessor = joblib.load("preprocessor.joblib")
# prediction = loaded_model.predict(loaded_preprocessor.transform(new_data))