# Machine Learning with TensorFlow

## Tensors

In [None]:
import tensorflow as tf
print(tf.version)
import tensorflow.compat.v2.feature_column
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib
from random import randrange

### Tensor rank and shape

Tensors can have rank 0-n (basically refers to the dimensions). <br>
Tensors' shape is the amount of elements/data in the tensors. <br>
* Shapes can be reshaped.

In [None]:
#Declare and initialize tensors
rank0_tensor = tf.Variable(9, tf.int16)
rank1_tensor = tf.Variable([1,2,3], tf.float64)
rank2_tensor = tf.Variable([[1,2,3], [4,5,6]], tf.int16)
rank3_tensor = tf.Variable([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])

In [None]:
#Get rank of tensor
tf.rank(rank1_tensor)

#Get shape of tensor
rank1_tensor.shape
rank2_tensor.shape
rank3_tensor.shape

In [None]:
#Reshape tensors

#Will create tensor with given shape with all 1
#tf.zeros also exits
oneTensor = tf.ones([2,3])
#print(oneTensor)

#Reshape
#-1 lets TensorFlow infer the dimension for that value
oneTensor = tf.reshape(oneTensor, [6])
print(oneTensor)
tf.rank(oneTensor)

### Tensor types
All tensor type are immutable except for "Variable". Availabe tensor types are: <br>
* Variable
* Constant
* Placeholder
* SparseTensor

### Evaluating tensors
Create session. <br>
The code evaluates the tensor variable that is stored in the default graph. The default graph holds all operations not specified to another graph. It is possible to create own seperate graphs.

In [None]:
#with tf.Session() as session: #this uses the default graph
    # oneTensor.eval() #tensor to evaluate

## TensorFlow's Core Learning Algorithms

### Linear Regression
Creates a line of best fit through a scatter plot of data
points that best expresses the relationship between those points. <br>
This can be done in any kind of dimensions. <br>

Feature columns are columns that contain the unique data, either categorical or numerical, per column of input data.

Epochs are the times the model re-evaluates data and thus learns from it.

Overfitting occurs when the same data is feed to the model too much, data is almost memorized which degrades future predictions.

Batches are used to feed smaller amounts to the model.

Data from: https://www.kaggle.com/uciml/pima-indians-diabetes-database

In [None]:
#Load data
dfTrain = pd.read_csv('CSV_Data/diabetes_train.csv')
dfTrain.pop('SkinThickness')
dfTrain.pop('Insulin')
dfEvaluate = pd.read_csv('CSV_Data/diabetes_eval.csv')
dfEvaluate.pop('SkinThickness')
dfEvaluate.pop('Insulin')
y_train = dfTrain.pop('Outcome')
y_evaluate = dfEvaluate.pop('Outcome')

#Just list
#y_train.head()
dfTrain.head()

#Statistical
#dfTrain.describe()

#dfTrain.shape

Visuals can also be created.

In [None]:
dfTrain.BMI.hist(bins=20)

In [None]:
dfTrain.Age.value_counts().plot(kind='barh', figsize=(20,20))

In [None]:
pd.concat([dfTrain, y_train], axis=1).groupby('Pregnancies').Outcome.mean().plot(kind='barh').set_xlabel('% high blood pressure')

Input functions are special functions that describe how data is fed into the model during the training process e.g. epochs, batch size etc.

The tensor flow model expects data to be a "tf.data.Dataset" object.

In [None]:
def CreateInputFunction(dataDataFrame, labelDataFrame, numEpochs=27, shuffle=True, batchSize=32):
    def InputFunction():
        #tf.data.Dataset object creation
        dataSet = tf.data.Dataset.from_tensor_slices((dict(dataDataFrame), labelDataFrame))
        if shuffle:
            dataSet = dataSet.shuffle(1000)
        dataSet = dataSet.batch(batchSize).repeat(numEpochs)
        return dataSet
    return InputFunction

trainInputFunction = CreateInputFunction(dfTrain, y_train)
evaluationInputFunction = CreateInputFunction(dfEvaluate, y_evaluate)


In [None]:
#Create feature columns
for col in dfTrain.columns:
    print(col)
    
featureColumns = []
for col in dfTrain.columns:
    featureColumns.append(tf.feature_column.numeric_column(col, dtype=tf.float32))

In [None]:
#Model creation
linearEstimator = tf.estimator.LinearClassifier(feature_columns=featureColumns)

#Model training
linearEstimator.train(trainInputFunction)
result = linearEstimator.evaluate(evaluationInputFunction)

clear_output()
print(result['accuracy'])

In [None]:
# Use model to make predictions
predictDictionaries = list(linearEstimator.predict(evaluationInputFunction))
index = randrange(177);
print(dfEvaluate.loc[index])
print(y_evaluate.loc[index])
print(predictDictionaries[index]['probabilities'])

### Classification

Takes input data and considers it between certain classes.

In [None]:
COLUMN_NAMES =  ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Species']
SPECIES = ['Setosa', 'Versicolor', 'Virginica']

In [None]:
trainPath = tf.keras.utils.get_file("iris_training.csv", "https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv")
EvalPath = tf.keras.utils.get_file("iris_test.csv", "https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv")

classifierTrain = pd.read_csv(trainPath, names=COLUMN_NAMES, header=0)
classifierEval = pd.read_csv(EvalPath, names=COLUMN_NAMES, header=0)

#trainClassification.pop('Id')
#evalClassification.pop('Id')

classificationTrainY = classifierTrain.pop('Species')
classificationEvalY = classifierEval.pop('Species')

#trainClassification.head()
train.head()

Classification also uses a special input function. The task of this function is analog to the one used in Linear Regression; to describe how data is fed into the model.

In [None]:
def ClassificationInputFunction(features, labels, training=True, batchSize=256):
    #tf.data.Dataset object creation
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
    if training:
        dataset = dataset.shuffle(1000).repeat()
    return dataset.batch(batchSize)

Classification also uses feature columns.

In [None]:
classificationFC = []
for key in classifierTrain.keys():
    classificationFC.append(tf.feature_column.numeric_column(key=key))
    print(key)

For classification there is a noticably large amount of different algorithms/models that are used to implement it. Often used classifiers are 'LinearClassifier' and 'DNNClassifier', which stands for 'Deep-Neural-Network Classifier'.

As the name implies linear classifiers depend on linear correlations whereas DNN-classifier don't.

In [None]:
#Model creation
classifierModel = tf.estimator.DNNClassifier(feature_columns=classificationFC, hidden_units=[30,10], n_classes=3)

#Model training
classifierModel.train(input_fn=lambda: ClassificationInputFunction(classifierTrain,classificationTrainY, training=True),steps=5000)

In [None]:
#Evaluates the precision of the model.
evaluationResult = classifierModel.evaluate(input_fn=lambda: ClassificationInputFunction(classifierEval,classificationEvalY, training=False))
print('Current accuracy was at {accuracy:0.2f}%'.format(**evaluationResult))

In [None]:
# Creates input function without labels for making predictions
def EvaluationFunction(features, batchSize=256):
    return tf.data.Dataset.from_tensor_slices(dict(features)).batch(batchSize)

#Predict for example value
prediction = {'SepalLength' : 5.2, 'SepalWidth': 2.8,'PetalLength':2.5,'PetalWidth' : 0.3}
predictions = classifierModel.predict(input_fn=lambda: EvaluationFunction(prediction))
for pred_dict in predictions:
    class_id = pred_dict['class_ids'][0]
    probability = pred_dict['probabilities'][class_id]

    print('Prediction is "{}" ({:.1f}%)'.format(
        SPECIES[class_id], 100 * probability))



### Clustering

### Hidden Markov Models