In [None]:
# %% [markdown]
# Sprint: Reading TensorFlow Examples and Rewriting to Keras

This notebook completes the sprint tasks: run official tutorial-style models and rewrite TensorFlow examples to **Keras (tf.keras)** for four datasets: Binary Iris, Multiclass Iris, House Prices (regression), and MNIST. The notebook uses **TensorFlow 2.x / tf.keras** APIs so it runs on modern Python (3.8+).

---

# %% [markdown]
# Instructions

- This notebook assumes **TensorFlow 2.x** is installed (`pip install tensorflow`). If you cannot install TensorFlow, you can still read the code and run parts that use only `scikit-learn` and `numpy`.
- For the **House Prices** section you need `train.csv` from the Kaggle competition placed in the same folder as this notebook. If you do not have it, that cell will show instructions and fall back to a small synthetic example.

---

# %%
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, accuracy_score
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
print("TensorFlow version:", tf.__version__)

# Ensure reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# %% [markdown]
# 1) Official tutorial example (short)
# %%
# MNIST quick example (official-style with tf.keras)
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
# normalize and reshape
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0
x_train = x_train.reshape((-1, 28 * 28))
x_test = x_test.reshape((-1, 28 * 28))

model = keras.Sequential([
    layers.Input(shape=(28 * 28,)),
    layers.Dense(128, activation="relu"),
    layers.Dense(64, activation="relu"),
    layers.Dense(10)
])

model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"]
)

print('\nTraining a small MNIST model (this may take ~1-2 minutes)')
history = model.fit(x_train, y_train, validation_split=0.1, epochs=5, batch_size=128, verbose=2)
print('\nEvaluating on test set')
model.evaluate(x_test, y_test, verbose=2)

# 2) Rewriting TensorFlow examples to Keras

# Below are four sections: Binary Iris, Multiclass Iris, House Prices (regression), and MNIST (again but as a Keras complete example).

# 2.1) Iris - Binary classification (Iris-versicolor vs Iris-virginica)

# %%
from sklearn.datasets import load_iris

# Prepare dataset - binary subset
iris = load_iris()
X = iris.data
y = iris.target
# keep only classes 1 and 2 (versicolor=1, virginica=2 in sklearn)
mask = (y == 1) | (y == 2)
X_bin = X[mask]
y_bin = y[mask]
# convert to 0/1 labels
y_bin = (y_bin == 2).astype(int)

X_train, X_test, y_train, y_test = train_test_split(X_bin, y_bin, test_size=0.2, random_state=42)
scaler = StandardScaler().fit(X_train)
X_train_s = scaler.transform(X_train)
X_test_s = scaler.transform(X_test)

# Build Keras model
model_bin = keras.Sequential([
    layers.Input(shape=(X_train_s.shape[1],)),
    layers.Dense(16, activation='relu'),
    layers.Dense(8, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])
model_bin.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

history_bin = model_bin.fit(X_train_s, y_train, validation_split=0.1, epochs=50, batch_size=8, verbose=0)
loss_bin, acc_bin = model_bin.evaluate(X_test_s, y_test, verbose=0)
print(f"Binary Iris Test Accuracy: {acc_bin:.4f}")

# %% [markdown]
# 2.2) Iris - Multiclass classification (all three species)

# %%
X = iris.data
y = iris.target
X_train_m, X_test_m, y_train_m, y_test_m = train_test_split(X, y, test_size=0.2, random_state=42)
scaler_m = StandardScaler().fit(X_train_m)
X_train_ms = scaler_m.transform(X_train_m)
X_test_ms = scaler_m.transform(X_test_m)

model_multi = keras.Sequential([
    layers.Input(shape=(X_train_ms.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(3)
])
model_multi.compile(optimizer='adam', loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
model_multi.fit(X_train_ms, y_train_m, validation_split=0.1, epochs=60, batch_size=8, verbose=0)
loss_m, acc_m = model_multi.evaluate(X_test_ms, y_test_m, verbose=0)
print(f"Multiclass Iris Test Accuracy: {acc_m:.4f}")

# %% [markdown]
# 2.3) House Prices - Regression (simple features)


# %%
hp_file = 'train.csv'
if os.path.exists(hp_file):
    df = pd.read_csv(hp_file)
    # use a small set of features for demonstration
    features = ['GrLivArea', 'YearBuilt']
    df = df.dropna(subset=features + ['SalePrice'])
    X_hp = df[features].values
    y_hp = df['SalePrice'].values
    X_train_hp, X_test_hp, y_train_hp, y_test_hp = train_test_split(X_hp, y_hp, test_size=0.2, random_state=42)
    scaler_hp = StandardScaler().fit(X_train_hp)
    X_train_hp_s = scaler_hp.transform(X_train_hp)
    X_test_hp_s = scaler_hp.transform(X_test_hp)
else:
    print("train.csv not found — falling back to synthetic regression dataset for demonstration.")
    from sklearn.datasets import make_regression
    X_hp, y_hp = make_regression(n_samples=500, n_features=2, noise=20.0, random_state=42)
    X_train_hp, X_test_hp, y_train_hp, y_test_hp = train_test_split(X_hp, y_hp, test_size=0.2, random_state=42)
    scaler_hp = StandardScaler().fit(X_train_hp)
    X_train_hp_s = scaler_hp.transform(X_train_hp)
    X_test_hp_s = scaler_hp.transform(X_test_hp)

# Keras Regression Model
model_reg = keras.Sequential([
    layers.Input(shape=(X_train_hp_s.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)
])
model_reg.compile(optimizer='adam', loss='mse', metrics=['mse'])
model_reg.fit(X_train_hp_s, y_train_hp, validation_split=0.1, epochs=80, batch_size=16, verbose=0)

mse_train = model_reg.evaluate(X_train_hp_s, y_train_hp, verbose=0)[0]
mse_test = model_reg.evaluate(X_test_hp_s, y_test_hp, verbose=0)[0]
print(f"House Prices regression MSE (test): {mse_test:.2f}")

# %% [markdown]
# 2.4) MNIST - Keras model (image classification)

# %%
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
# normalize and reshape
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0
x_train = x_train.reshape((-1, 28 * 28))
x_test = x_test.reshape((-1, 28 * 28))

model_mnist = keras.Sequential([
    layers.Input(shape=(28 * 28,)),
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(10)
])
model_mnist.compile(optimizer='adam', loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
model_mnist.fit(x_train, y_train, validation_split=0.1, epochs=6, batch_size=256, verbose=2)
loss_mnist, acc_mnist = model_mnist.evaluate(x_test, y_test, verbose=2)
print(f"MNIST Test Accuracy: {acc_mnist:.4f}")

# %% [markdown]
# Question 8 (Advanced): Framework comparison

# Below is a short comparison table you can expand in your report:

# - Calculation speed: TensorFlow and PyTorch both offer GPU acceleration. TF has a static graph option (TF1) and eager (TF2) while PyTorch is eager by default. Speed depends on versions and implementations (TF's XLA/TPU, PyTorch's optimized kernels).
# - Code Line Count & Readability: Keras (tf.keras) is highest-level and most concise for standard models. PyTorch provides clear imperative code and is often preferred for research readability. Raw TensorFlow 1.x is verbose.
# - Available features: TF ecosystem includes TensorBoard, TF Lite, TF Serving; PyTorch has TorchServe, torchvision, and tight community-driven research tools.

# %% [markdown]
# 4) Save models (optional)


model_bin.save('iris_binary_keras.h5')
model_multi.save('iris_multi_keras.h5')
model_reg.save('house_prices_keras.h5')
model_mnist.save('mnist_keras.h5')

print('Models saved: iris_binary_keras.h5, iris_multi_keras.h5, house_prices_keras.h5, mnist_keras.h5')

