In [None]:
# %% [markdown]
# # Assignment - TensorFlow Framework Analysis
# 
# ## Question 1: Review of components needed to implement deep learning
# 
# Based on what we learned implementing neural networks from scratch, the main components are:
# 
# 1. **Weight and bias initialization**
# 2. **Epoch loop**
# 3. **Forward propagation**
# 4. **Loss function calculation**
# 5. **Backward propagation**
# 6. **Parameter update (optimization)**
# 7. **Evaluation metrics calculation**
# 8. **Dataset splitting (train/val/test)**
# 9. **Mini-batch processing**
# 10. **Activation functions**

# %% [markdown]
# ## Question 2: Correspondence between scratch implementation and TensorFlow
# 
# Let's analyze how TensorFlow implements each component:


import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
import matplotlib.pyplot as plt

# Verificar versão do TensorFlow
print(f"TensorFlow version: {tf.__version__}")

# %% [markdown]
# ### Análise do código sample - Correspondências:
# 
# 1. **Inicialização de pesos**: `tf.Variable(tf.random_normal(...))`
# 2. **Loop de épocas**: `for epoch in range(num_epochs)`
# 3. **Forward propagation**: Função `example_net()` com operações da rede
# 4. **Função de perda**: `tf.nn.sigmoid_cross_entropy_with_logits()`
# 5. **Backward propagation**: `optimizer.minimize(loss_op)` (automático)
# 6. **Atualização de parâmetros**: `train_op` no `sess.run()`
# 7. **Métricas**: `accuracy` calculada com `tf.equal()` e `tf.reduce_mean()`
# 8. **Divisão do dataset**: `train_test_split()`
# 9. **Mini-batch**: Classe `GetMiniBatch`
# 10. **Funções de ativação**: `tf.nn.relu()`, `tf.sigmoid()`

# %% [markdown]
# ## Questão 3: Modelo Iris com 3 classes

# %%
# Configuração para dataset Iris com 3 classes
print("=== IRIS 3 CLASSES ===")

# Carregar dados
df_iris = pd.read_csv("Iris.csv")
print(f"Dataset shape: {df_iris.shape}")
print(f"Species: {df_iris['Species'].unique()}")

# Preparar features e target
X_iris = df_iris.loc[:, ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]].values
y_iris = df_iris["Species"]

# Converter labels para numérico (3 classes)
species_map = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
y_iris = y_iris.map(species_map).values
y_iris = y_iris.astype(np.int64)[:, np.newaxis]

print(f"X shape: {X_iris.shape}")
print(f"y shape: {y_iris.shape}")
print(f"Class distribution: {np.unique(y_iris, return_counts=True)}")

# Dividir dados
X_train_iris, X_test_iris, y_train_iris, y_test_iris = train_test_split(
    X_iris, y_iris, test_size=0.2, random_state=0, stratify=y_iris
)
X_train_iris, X_val_iris, y_train_iris, y_val_iris = train_test_split(
    X_train_iris, y_train_iris, test_size=0.2, random_state=0, stratify=y_train_iris
)

print(f"Train: {X_train_iris.shape}, Val: {X_val_iris.shape}, Test: {X_test_iris.shape}")

# %%
class MultiClassNet:
    """
    Rede neural para classificação multi-classe
    """
    def __init__(self, n_input, n_hidden1=50, n_hidden2=100, n_classes=3, learning_rate=0.001):
        self.n_input = n_input
        self.n_hidden1 = n_hidden1
        self.n_hidden2 = n_hidden2
        self.n_classes = n_classes
        self.learning_rate = learning_rate
        
        # Placeholders
        self.X = tf.placeholder(tf.float32, [None, n_input])
        self.Y = tf.placeholder(tf.int64, [None, 1])
        
        # Converter labels para one-hot encoding
        self.Y_one_hot = tf.one_hot(tf.squeeze(self.Y), depth=n_classes)
        
        # Build network
        self.logits = self._build_network()
        
        # Loss e optimizer
        self.loss_op = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.Y_one_hot, logits=self.logits)
        )
        self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        self.train_op = self.optimizer.minimize(self.loss_op)
        
        # Predictions e accuracy
        self.predictions = tf.argmax(self.logits, axis=1)
        self.correct_pred = tf.equal(self.predictions, tf.squeeze(self.Y))
        self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32))
    
    def _build_network(self):
        """Construir a arquitetura da rede"""
        tf.set_random_seed(0)
        
        # Camadas
        weights = {
            'w1': tf.Variable(tf.random_normal([self.n_input, self.n_hidden1])),
            'w2': tf.Variable(tf.random_normal([self.n_hidden1, self.n_hidden2])),
            'w3': tf.Variable(tf.random_normal([self.n_hidden2, self.n_classes]))
        }
        biases = {
            'b1': tf.Variable(tf.random_normal([self.n_hidden1])),
            'b2': tf.Variable(tf.random_normal([self.n_hidden2])),
            'b3': tf.Variable(tf.random_normal([self.n_classes]))
        }
        
        layer_1 = tf.add(tf.matmul(self.X, weights['w1']), biases['b1'])
        layer_1 = tf.nn.relu(layer_1)
        layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
        layer_2 = tf.nn.relu(layer_2)
        layer_output = tf.add(tf.matmul(layer_2, weights['w3']), biases['b3'])
        
        return layer_output

# %%
# Treinar modelo Iris 3 classes
print("Treinando modelo Iris 3 classes...")

# Hiperparâmetros
learning_rate = 0.001
batch_size = 10
num_epochs = 100

# Criar modelo
model_iris = MultiClassNet(
    n_input=X_train_iris.shape[1],
    n_classes=3,
    learning_rate=learning_rate
)

# Inicializar variáveis
init = tf.global_variables_initializer()

# Data iterator
get_mini_batch_train = GetMiniBatch(X_train_iris, y_train_iris, batch_size=batch_size)

# Treinar
with tf.Session() as sess:
    sess.run(init)
    
    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []
    
    for epoch in range(num_epochs):
        total_batch = len(get_mini_batch_train)
        total_loss = 0
        total_acc = 0
        
        for i, (mini_batch_x, mini_batch_y) in enumerate(get_mini_batch_train):
            _, loss, acc = sess.run([model_iris.train_op, model_iris.loss_op, model_iris.accuracy],
                                  feed_dict={model_iris.X: mini_batch_x, model_iris.Y: mini_batch_y})
            total_loss += loss
            total_acc += acc
        
        avg_loss = total_loss / total_batch
        avg_acc = total_acc / total_batch
        
        # Validation
        val_loss, val_acc = sess.run([model_iris.loss_op, model_iris.accuracy],
                                   feed_dict={model_iris.X: X_val_iris, model_iris.Y: y_val_iris})
        
        train_losses.append(avg_loss)
        val_losses.append(val_loss)
        train_accs.append(avg_acc)
        val_accs.append(val_acc)
        
        if epoch % 20 == 0:
            print(f"Epoch {epoch}, Loss: {avg_loss:.4f}, Val Loss: {val_loss:.4f}, "
                  f"Acc: {avg_acc:.3f}, Val Acc: {val_acc:.3f}")
    
    # Test
    test_acc = sess.run(model_iris.accuracy,
                       feed_dict={model_iris.X: X_test_iris, model_iris.Y: y_test_iris})
    print(f"\nTest Accuracy: {test_acc:.3f}")

# %%
# Plot resultados Iris 3 classes
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Loss')
plt.title('Loss - Iris 3 Classes')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_accs, label='Train Accuracy')
plt.plot(val_accs, label='Val Accuracy')
plt.title('Accuracy - Iris 3 Classes')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

# %% [markdown]
# ## Questão 4: Modelo House Prices (Regressão)

# %%
print("\n=== HOUSE PRICES REGRESSION ===")

# Carregar dados House Prices
try:
    df_house = pd.read_csv("train.csv")
    print(f"House Prices dataset shape: {df_house.shape}")
    
    # Selecionar features e target
    features = ['GrLivArea', 'YearBuilt', 'OverallQual', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF']
    target = 'SalePrice'
    
    # Verificar se colunas existem
    available_features = [f for f in features if f in df_house.columns]
    print(f"Available features: {available_features}")
    
    if target not in df_house.columns:
        raise ValueError(f"Target column '{target}' not found")
    
    # Preparar dados
    X_house = df_house[available_features].fillna(0).values
    y_house = df_house[target].values[:, np.newaxis]
    
    print(f"X shape: {X_house.shape}, y shape: {y_house.shape}")
    print(f"Price stats - Min: {y_house.min():.0f}, Max: {y_house.max():.0f}, Mean: {y_house.mean():.0f}")
    
    # Normalizar features e target
    X_house = (X_house - X_house.mean(axis=0)) / (X_house.std(axis=0) + 1e-8)
    y_house = (y_house - y_house.mean()) / (y_house.std() + 1e-8)
    
    # Dividir dados
    X_train_house, X_test_house, y_train_house, y_test_house = train_test_split(
        X_house, y_house, test_size=0.2, random_state=0
    )
    X_train_house, X_val_house, y_train_house, y_val_house = train_test_split(
        X_train_house, y_train_house, test_size=0.2, random_state=0
    )
    
    print(f"Train: {X_train_house.shape}, Val: {X_val_house.shape}, Test: {X_test_house.shape}")

except FileNotFoundError:
    print("House Prices dataset not found. Using synthetic data for demonstration.")
    
    # Criar dados sintéticos
    np.random.seed(42)
    n_samples = 1000
    X_house = np.random.randn(n_samples, 3)
    true_weights = np.array([2.5, 1.8, -0.9])[:, np.newaxis]
    y_house = X_house @ true_weights + np.random.randn(n_samples, 1) * 0.5
    
    # Dividir dados
    X_train_house, X_test_house, y_train_house, y_test_house = train_test_split(
        X_house, y_house, test_size=0.2, random_state=0
    )
    X_train_house, X_val_house, y_train_house, y_val_house = train_test_split(
        X_train_house, y_train_house, test_size=0.2, random_state=0
    )
    
    print(f"Synthetic data - Train: {X_train_house.shape}, Val: {X_val_house.shape}, Test: {X_test_house.shape}")

# %%
class RegressionNet:
    """
    Rede neural para problemas de regressão
    """
    def __init__(self, n_input, n_hidden1=50, n_hidden2=100, learning_rate=0.001):
        self.n_input = n_input
        self.n_hidden1 = n_hidden1
        self.n_hidden2 = n_hidden2
        self.learning_rate = learning_rate
        
        # Placeholders
        self.X = tf.placeholder(tf.float32, [None, n_input])
        self.Y = tf.placeholder(tf.float32, [None, 1])
        
        # Build network
        self.predictions = self._build_network()
        
        # Loss (MSE) e optimizer
        self.loss_op = tf.reduce_mean(tf.square(self.Y - self.predictions))
        self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        self.train_op = self.optimizer.minimize(self.loss_op)
        
        # Métricas adicionais
        self.mae = tf.reduce_mean(tf.abs(self.Y - self.predictions))
        self.rmse = tf.sqrt(self.loss_op)
    
    def _build_network(self):
        """Construir a arquitetura da rede"""
        tf.set_random_seed(0)
        
        weights = {
            'w1': tf.Variable(tf.random_normal([self.n_input, self.n_hidden1])),
            'w2': tf.Variable(tf.random_normal([self.n_hidden1, self.n_hidden2])),
            'w3': tf.Variable(tf.random_normal([self.n_hidden2, 1]))
        }
        biases = {
            'b1': tf.Variable(tf.random_normal([self.n_hidden1])),
            'b2': tf.Variable(tf.random_normal([self.n_hidden2])),
            'b3': tf.Variable(tf.random_normal([1]))
        }
        
        layer_1 = tf.add(tf.matmul(self.X, weights['w1']), biases['b1'])
        layer_1 = tf.nn.relu(layer_1)
        layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
        layer_2 = tf.nn.relu(layer_2)
        layer_output = tf.add(tf.matmul(layer_2, weights['w3']), biases['b3'])
        
        return layer_output

# %%
# Treinar modelo House Prices
print("Treinando modelo House Prices...")

# Hiperparâmetros
learning_rate = 0.001
batch_size = 16
num_epochs = 100

# Criar modelo
model_house = RegressionNet(
    n_input=X_train_house.shape[1],
    learning_rate=learning_rate
)

# Inicializar variáveis
init = tf.global_variables_initializer()

# Data iterator
get_mini_batch_house = GetMiniBatch(X_train_house, y_train_house, batch_size=batch_size)

# Treinar
with tf.Session() as sess:
    sess.run(init)
    
    train_losses = []
    val_losses = []
    
    for epoch in range(num_epochs):
        total_batch = len(get_mini_batch_house)
        total_loss = 0
        
        for i, (mini_batch_x, mini_batch_y) in enumerate(get_mini_batch_house):
            _, loss = sess.run([model_house.train_op, model_house.loss_op],
                             feed_dict={model_house.X: mini_batch_x, model_house.Y: mini_batch_y})
            total_loss += loss
        
        avg_loss = total_loss / total_batch
        
        # Validation
        val_loss, val_mae, val_rmse = sess.run([model_house.loss_op, model_house.mae, model_house.rmse],
                                              feed_dict={model_house.X: X_val_house, model_house.Y: y_val_house})
        
        train_losses.append(avg_loss)
        val_losses.append(val_loss)
        
        if epoch % 20 == 0:
            print(f"Epoch {epoch}, Train Loss: {avg_loss:.4f}, Val Loss: {val_loss:.4f}, "
                  f"Val MAE: {val_mae:.4f}, Val RMSE: {val_rmse:.4f}")
    
    # Test
    test_loss, test_mae, test_rmse = sess.run([model_house.loss_op, model_house.mae, model_house.rmse],
                                             feed_dict={model_house.X: X_test_house, model_house.Y: y_test_house})
    print(f"\nTest Results - Loss: {test_loss:.4f}, MAE: {test_mae:.4f}, RMSE: {test_rmse:.4f}")

# %%
# Plot resultados House Prices
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss (MSE)')
plt.plot(val_losses, label='Val Loss (MSE)')
plt.title('Loss - House Prices')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()

# %% [markdown]
# ## Questão 5: Modelo MNIST

# %%
print("\n=== MNIST DATASET ===")

# Carregar MNIST dataset
try:
    from tensorflow.examples.tutorials.mnist import input_data
    mnist = input_data.read_data_sets("MNIST_data/", one_hot=False)
    
    X_train_mnist = mnist.train.images
    y_train_mnist = mnist.train.labels[:, np.newaxis]
    X_val_mnist = mnist.validation.images
    y_val_mnist = mnist.validation.labels[:, np.newaxis]
    X_test_mnist = mnist.test.images
    y_test_mnist = mnist.test.labels[:, np.newaxis]
    
    print(f"MNIST shapes:")
    print(f"Train: {X_train_mnist.shape}, {y_train_mnist.shape}")
    print(f"Val: {X_val_mnist.shape}, {y_val_mnist.shape}")
    print(f"Test: {X_test_mnist.shape}, {y_test_mnist.shape}")
    print(f"Pixel range: [{X_train_mnist.min():.3f}, {X_train_mnist.max():.3f}]")

except:
    print("MNIST dataset not available. Using Iris 3-class as alternative.")
    # Usar Iris 3-class como fallback
    X_train_mnist, X_test_mnist, y_train_mnist, y_test_mnist = X_train_iris, X_test_iris, y_train_iris, y_test_iris
    X_train_mnist, X_val_mnist, y_train_mnist, y_val_mnist = X_train_iris, X_val_iris, y_train_iris, y_val_iris

# %%
class MNISTNet:
    """
    Rede neural para MNIST (simulando a implementação scratch)
    """
    def __init__(self, n_input, n_hidden1=200, n_hidden2=100, n_classes=10, learning_rate=0.001):
        self.n_input = n_input
        self.n_hidden1 = n_hidden1
        self.n_hidden2 = n_hidden2
        self.n_classes = n_classes
        self.learning_rate = learning_rate
        
        # Placeholders
        self.X = tf.placeholder(tf.float32, [None, n_input])
        self.Y = tf.placeholder(tf.int64, [None, 1])
        
        # Converter labels para one-hot
        self.Y_one_hot = tf.one_hot(tf.squeeze(self.Y), depth=n_classes)
        
        # Build network (similar à implementação scratch)
        self.logits = self._build_network()
        
        # Loss e optimizer
        self.loss_op = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.Y_one_hot, logits=self.logits)
        )
        self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        self.train_op = self.optimizer.minimize(self.loss_op)
        
        # Predictions e accuracy
        self.predictions = tf.argmax(self.logits, axis=1)
        self.correct_pred = tf.equal(self.predictions, tf.squeeze(self.Y))
        self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32))
    
    def _build_network(self):
        """Construir arquitetura similar à implementação scratch"""
        tf.set_random_seed(0)
        
        # Inicialização similar à scratch
        weights = {
            'w1': tf.Variable(tf.random_normal([self.n_input, self.n_hidden1], stddev=0.1)),
            'w2': tf.Variable(tf.random_normal([self.n_hidden1, self.n_hidden2], stddev=0.1)),
            'w3': tf.Variable(tf.random_normal([self.n_hidden2, self.n_classes], stddev=0.1))
        }
        biases = {
            'b1': tf.Variable(tf.zeros([self.n_hidden1])),
            'b2': tf.Variable(tf.zeros([self.n_hidden2])),
            'b3': tf.Variable(tf.zeros([self.n_classes]))
        }
        
        # Forward pass (similar à scratch)
        layer_1 = tf.add(tf.matmul(self.X, weights['w1']), biases['b1'])
        layer_1 = tf.nn.sigmoid(layer_1)  # Usando sigmoid como na implementação scratch
        
        layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
        layer_2 = tf.nn.sigmoid(layer_2)
        
        layer_output = tf.add(tf.matmul(layer_2, weights['w3']), biases['b3'])
        
        return layer_output

# %%
# Treinar modelo MNIST
print("Treinando modelo MNIST...")

# Hiperparâmetros
learning_rate = 0.001
batch_size = 100
num_epochs = 50

# Criar modelo
model_mnist = MNISTNet(
    n_input=X_train_mnist.shape[1],
    n_classes=len(np.unique(y_train_mnist)),
    learning_rate=learning_rate
)

# Inicializar variáveis
init = tf.global_variables_initializer()

# Data iterator
get_mini_batch_mnist = GetMiniBatch(X_train_mnist, y_train_mnist, batch_size=batch_size)

# Treinar
with tf.Session() as sess:
    sess.run(init)
    
    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []
    
    for epoch in range(num_epochs):
        total_batch = len(get_mini_batch_mnist)
        total_loss = 0
        total_acc = 0
        
        for i, (mini_batch_x, mini_batch_y) in enumerate(get_mini_batch_mnist):
            _, loss, acc = sess.run([model_mnist.train_op, model_mnist.loss_op, model_mnist.accuracy],
                                  feed_dict={model_mnist.X: mini_batch_x, model_mnist.Y: mini_batch_y})
            total_loss += loss
            total_acc += acc
        
        avg_loss = total_loss / total_batch
        avg_acc = total_acc / total_batch
        
        # Validation
        val_loss, val_acc = sess.run([model_mnist.loss_op, model_mnist.accuracy],
                                   feed_dict={model_mnist.X: X_val_mnist, model_mnist.Y: y_val_mnist})
        
        train_losses.append(avg_loss)
        val_losses.append(val_loss)
        train_accs.append(avg_acc)
        val_accs.append(val_acc)
        
        if epoch % 10 == 0:
            print(f"Epoch {epoch}, Loss: {avg_loss:.4f}, Val Loss: {val_loss:.4f}, "
                  f"Acc: {avg_acc:.3f}, Val Acc: {val_acc:.3f}")
    
    # Test
    test_acc = sess.run(model_mnist.accuracy,
                       feed_dict={model_mnist.X: X_test_mnist, model_mnist.Y: y_test_mnist})
    print(f"\nTest Accuracy: {test_acc:.3f}")

# %%
# Plot resultados MNIST
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Loss')
plt.title('Loss - MNIST')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_accs, label='Train Accuracy')
plt.plot(val_accs, label='Val Accuracy')
plt.title('Accuracy - MNIST')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

# %% [markdown]
# ## Resumo das Diferenças entre Implementações
# 
# ### Classificação Binária vs Multi-classe:
# - **Binária**: `sigmoid_cross_entropy_with_logits`, saída com 1 neurônio
# - **Multi-classe**: `softmax_cross_entropy_with_logits_v2`, saída com n_classes neurônios
# 
# ### Classificação vs Regressão:
# - **Classificação**: Cross-entropy loss, métricas de accuracy
# - **Regressão**: MSE loss, métricas como MAE e RMSE
# 
# ### Framework vs Scratch:
# - **TensorFlow**: Gerencia automaticamente backward propagation, oferece otimizadores prontos
# - **Scratch**: Implementação manual de todas as operações

# %%
print("\n=== RESUMO FINAL ===")
print("1. Iris 3 Classes: Modelo de classificação multi-classe usando softmax")
print("2. House Prices: Modelo de regressão usando MSE loss")  
print("3. MNIST: Modelo de classificação multi-classe para imagens")
print("\nTodos os modelos demonstram a correspondência entre implementação scratch e TensorFlow")

^C
Note: you may need to restart the kernel to use updated packages.


ModuleNotFoundError: No module named 'tensorflow'