# Installing Tensorflow

In [1]:
!python --version
!python --version

Python 3.8.5
Python 3.8.5


In [2]:
#!pip install tensorflow
#!pip install tensorflow

In [3]:
#!pip install tensorflow==2.0

In [4]:
#!pip install --ignore-installed --upgrade tensorflow==2.0

In [10]:
import tensorflow as tf
import tensorflow as tf
tf.__version__
tf.__version__

'2.6.0'

# Keras Syntax Basics

In [11]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline
# %matplotlib notebook
plt.rcParams["figure.figsize"] = (10,6)
# plt.rcParams['figure.dpi'] = 100
sns.set_style("whitegrid")
import warnings
warnings.filterwarnings("ignore")
warnings.warn("this will not show")
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [7]:
#from google.colab import drive
#drive.mount('/content/drive')

In [8]:
#df = pd.read_csv("drive/MyDrive/Colab_Files/data/fake_reg.csv")

In [9]:
df = pd.read_csv("../data&resources/fake_reg.csv")

FileNotFoundError: [Errno 2] No such file or directory: '../data&resources/fake_reg.csv'

## Exploratory Data Analysis and Visualization

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe().T

In [None]:
sns.pairplot(df);

In [None]:
df.corr()

## Preprocessing of Data
- Train | Test Split, Scalling

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = df[['feature1', 'feature2']].values
y = df['price'].values

In [None]:
X

In [None]:
y

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
help(MinMaxScaler)

In [None]:
scaler = MinMaxScaler()

In [None]:
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train.max()

In [None]:
X_train.min()

In [None]:
X_test.max()

In [None]:
X_test.min()

## Modelling

In [None]:
import tensorflow as tf

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation

In [None]:
help(Sequential)

In [None]:
help(Dense)

In [None]:
# First way to create model

model = Sequential([Dense(units = 4, activation = "relu"),
                    Dense(units = 4, activation = "relu"),
                    Dense(units = 4, activation = "relu"),
                    Dense(units = 1)])

In [None]:
# Second way to create model (recommended)

model = Sequential()
model.add(Dense(units = 4, activation = "relu"))     # input_dim = X_train.shape[1] or input_shape = X_train.shape
model.add(Dense(units = 4, activation = "relu"))
model.add(Dense(units = 4, activation = "relu"))
model.add(Dense(units = 1))

In [None]:
X_train.shape[1]

In [None]:
model.compile(optimizer = "rmsprop", loss = "mse")

### Choosing an optimizer and loss

Keep in mind what kind of problem you are trying to solve:

    # For a multi-class classification problem
    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # For a binary classification problem
    model.compile(optimizer='rmsprop',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # For a mean squared error regression problem
    model.compile(optimizer='rmsprop',
                  loss='mse')

In [None]:
model.fit(x = X_train, y = y_train, batch_size = 32, epochs = 250)

In [None]:
model.summary()

In [None]:
model.weights

In [None]:
model.history.history

In [None]:
pd.DataFrame(model.history.history)

In [None]:
loss_df = pd.DataFrame(model.history.history)
loss_df.plot()

In [None]:
fig, ax= plt.subplots()
ax.plot(loss_df)
ax.set_xlabel("epoch")
ax.set_ylabel("loss")
ax.set_title("loss - epoch graph")
ax.set_xlim([0, 250]) # focusing on the given x values
ax.set_ylim([0, 3000]) # focusing on the given y values

## Model Performance

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score

In [None]:
model.evaluate(X_train, y_train, verbose=0)

In [None]:
model.evaluate(X_test, y_test, verbose=0)

In [None]:
y_pred = model.predict(X_test)

In [None]:
explained_variance_score(y_test, y_pred)

In [None]:
my_dict = {"Actual" : y_test, "Pred" : y_pred.reshape(-1,)}
compare = pd.DataFrame(my_dict)
compare.sample(10)

In [None]:
sns.scatterplot(x = 'Actual', y = 'Pred', data = compare)

In [None]:
def eval_metric(actual, pred):
    mae = mean_absolute_error(actual, pred)
    mse = mean_squared_error(actual, pred)
    rmse = np.sqrt(mean_squared_error(actual, pred))
    score = r2_score(actual, pred)
    return print("r2_score:", score, "\nmae:", mae, "\nmse:", mse, "\nrmse:", rmse)

In [None]:
eval_metric(y_test, y_pred)

## Final Model and Model Deployment

In [None]:
scaler = MinMaxScaler().fit(X)

In [None]:
import pickle
pickle.dump(scaler, open("scaler_fake_reg", 'wb'))

In [None]:
X_scaled = scaler.transform(X)

In [None]:
final_model = Sequential()

final_model.add(Dense(units = 4, activation = "relu"))     # input_dim = 2 or input_dim = X_train.shape[0]
final_model.add(Dense(units = 4, activation = "relu"))
final_model.add(Dense(units = 4, activation = "relu"))
final_model.add(Dense(1))

final_model.compile(optimizer = "rmsprop", loss = "mse")

final_model.fit(x = X_scaled, y = y, batch_size = 32, epochs = 250)

H5 is a file format to store structured data, it's not a model by itself. Keras saves models in this format as it can easily store the weights and model configuration in a single file.

In [None]:
model.save('model_fake_reg.h5')  # creates a HDF5 file 'my_model.h5'

## Prediction-1

In [None]:
from tensorflow.keras.models import load_model

In [None]:
model_fake_reg = load_model('model_fake_reg.h5')

In [None]:
scaler_fake_reg = pickle.load(open("scaler_fake_reg", "rb"))

In [None]:
sample = [[998, 1000]]

In [None]:
sample_scaled = scaler_fake_reg.transform(sample)

In [None]:
model_fake_reg.predict(sample_scaled)

## Prediction-2

In [None]:
my_dict = {"Feature1": [997, 998, 999, 1000, 1001],
           "Feature2": [1000, 999, 998, 997, 996]}

In [None]:
sample = pd.DataFrame(my_dict)
sample

In [None]:
sample_scaled = scaler_fake_reg.transform(sample)
sample_scaled

In [None]:
model_fake_reg.predict(sample_scaled)