In [None]:
import tempfile
import pandas as pd
import tensorflow as tf
import tensorflow_transform as tft
import tensorflow_transform.beam.impl as tft_beam

from __future__ import print_function
from tensorflow_transform.tf_metadata import dataset_metadata, dataset_schema

In [None]:
dataset =  pd.read_csv("pollution-small.csv")

In [None]:
dataset.head()

In [None]:
fratures = dataset.drop("Date",axis=1)

In [None]:
fratures.head()

In [None]:
dict_features = list(fratures.to_dict("index").values())

In [None]:
dict_features[:2]

In [None]:
data_model = dataset_metadata.DatasetMetadata(
    dataset_schema.from_feature_spec({
        'no2':  tf.compat.v1.FixedLenFeature([],tf.float32),
        'so2':  tf.compat.v1.FixedLenFeature([],tf.float32),
        'pm10': tf.compat.v1.FixedLenFeature([],tf.float32),
        'soot': tf.compat.v1.FixedLenFeature([],tf.float32),
    }
    )
)

In [None]:
data_model

In [None]:
def preprocessing_fn(inputs):
    
    no2 = inputs['no2']
    pm10 = inputs['pm10']
    so2 = inputs['so2']
    soot = inputs['soot']
    
    no2_normalized = no2 - tft.mean(no2)
    so2_normalized = so2 - tft.mean(so2)
    
    pm10_normalized = tft.scale_to_0_1(pm10)
    soot_normalized = tft.scale_by_min_max(soot)
    
    return {
        "no2_normalized":no2_normalized,
        "so2_normalized":so2_normalized,
        "pm10_normalized":pm10_normalized,
        "soot_normalized":soot_normalized
    }

In [None]:
def data_transform():
    
    with tft_beam.Context(temp_dir=tempfile.mkdtemp()):
        transformed_dataset, transform_fn = ((dict_features, data_model) | tft_beam.AnalyzeAndTransformDataset(preprocessing_fn))
        
    transformed_data, transformed_metadata = transformed_dataset
    
    for i in range(len(transformed_data)):
        print("Raw: ", dict_features[i])
        print("Transformed:", transformed_data[i])

In [None]:
data_transform()