<a href="https://colab.research.google.com/github/Flantropy/notebooks/blob/main/TF_intro_03.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from tensorflow.keras.utils import plot_model
from google.colab import files
from collections import namedtuple
from operator import attrgetter

# A Larger Example

In [None]:
# Read in dataset from GitHub (csv)
df = pd.read_csv('https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv')

In [None]:
df

In [None]:
df = pd.get_dummies(df)
df.head()

In [None]:
X = df.drop('charges', axis=1)
y = df['charges']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

In [None]:
len(X), len(X_train), len(X_test)

In [None]:
len(y), len(y_train), len(y_test)

In [None]:
tf.random.set_seed(42)
layers = [
    # tf.keras.layers.InputLayer(input_shape=[11]),
    tf.keras.layers.Dense(20, activation='relu'),
    tf.keras.layers.Dense(30, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(1)
]

anna = tf.keras.Sequential(layers)
anna.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    loss=tf.keras.losses.mae,
    metrics=['mae'],
)

history = anna.fit(X_train, y_train, epochs=200, verbose=0)

## Evaluation

In [None]:
Record = namedtuple('Record', ['index', 'true', 'pred'])

In [None]:
def plot_prediced_vs_expexted(d: Record):
    plt.figure(figsize=(10, 7))
    plt.scatter(range(len(d)), [r.true for r in d], c='g', label='Testing data')
    plt.scatter(range(len(d)), [r.pred for r in d], c='r', label='Predictions')
    plt.legend();

In [None]:
anna.evaluate(X_test, y_test)

In [None]:
preds = anna.predict(X_test).squeeze()
data = [Record(x, y, z) for x, y, z in zip(y_test.index, y_test.values, preds)]
sorted_data = sorted(data, key=attrgetter('true'))
plot_prediced_vs_expexted(sorted_data)

In [None]:
pd.DataFrame(history.history).plot()
plt.ylabel('loss')
plt.xlabel('epochs')

# Preprocessing data (normalization and standardization)

In [None]:
X['age'].plot(kind='hist')

In [None]:
df2 = pd.read_csv('https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv')

In [None]:
df2

In [None]:
# Create a transformer for our df
ct = make_column_transformer(
    (MinMaxScaler(), ['age', 'bmi', 'children']),
    (OneHotEncoder(handle_unknown='ignore'), ['sex', 'smoker', 'region'])
)

In [None]:
X = df2.drop('charges', axis=1)
y = df2['charges']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

# Fit the transformer
ct.fit(X_train)

# Normalizing
X_train_normal = ct.transform(X_train)
X_test_normal = ct.transform(X_test)

In [None]:
X_train.loc[0]

In [None]:
X_train_normal[0]

In [None]:
X_train.shape, X_train_normal.shape

In [None]:
tf.random.set_seed(42)
layers = [
    tf.keras.layers.InputLayer(input_shape=[11]),
    tf.keras.layers.Dense(20, activation='relu'),
    tf.keras.layers.Dense(30, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(1)
]

anna = tf.keras.Sequential(layers)
anna.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    loss=tf.keras.losses.mae,
    metrics=['mae'],
)

history = anna.fit(X_train_normal, y_train, epochs=100, verbose=0)

In [None]:
anna.evaluate(X_test_normal, y_test)

In [None]:
pd.DataFrame(history.history).plot()