In [6]:
import os 
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [7]:
import tensorflow as tf
import pandas as pd
from tensorflow import keras
from keras.utils import FeatureSpace
from keras.utils.np_utils import to_categorical

In [8]:
tf.__version__

'2.12.0'

In [10]:
raw_data = {
    "float_values": [0.0, 0.1, 0.2, 0.3],
    "string_values": ["zero", "one", "two", "three"],
    "int_values": [0, 1, 2, 3],
}
dataset = tf.data.Dataset.from_tensor_slices(raw_data)

feature_space = FeatureSpace(
    features={
        "float_values": "float_normalized",
        "string_values": "string_categorical",
        "int_values": "integer_categorical",
    },
    # crosses=[("string_values", "int_values")],
    output_mode="concat",
)
# Before you start using the FeatureSpace,
# you must `adapt()` it on some data.
feature_space.adapt(dataset)

# You can call the FeatureSpace on a dict of data (batched or unbatched).
output_vector = feature_space(raw_data)
output_vector

2023-04-21 12:40:07.061389: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_2' with dtype string and shape [4]
	 [[{{node Placeholder/_2}}]]
2023-04-21 12:40:07.086445: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_2' with dtype string and shape [4]
	 [[{{node Placeholder/_2}}]]
2023-04-21 12:40:07.302902: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_2' with dtype string and shape [4]
	 [[{{n

<tf.Tensor: shape=(4, 11), dtype=float32, numpy=
array([[-1.3416407 ,  0.        ,  0.        ,  0.        ,  0.        ,
         1.        ,  0.        ,  1.        ,  0.        ,  0.        ,
         0.        ],
       [-0.44721362,  0.        ,  0.        ,  0.        ,  1.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         1.        ],
       [ 0.44721353,  0.        ,  0.        ,  1.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  1.        ,  0.        ,
         0.        ],
       [ 1.3416407 ,  0.        ,  1.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  1.        ,
         0.        ]], dtype=float32)>

In [11]:
header=['Elevation','Aspect','Slope','Horizontal_Distance_To_Hydrology','Vertical_Distance_To_Hydrology','Horizontal_Distance_To_Roadways'
        ,'Hillshade_9am','Hillshade_Noon','Hillshade_3pm','Horizontal_Distance_To_Fire_Points'
        ,'Wilderness_Area1','Wilderness_Area2','Wilderness_Area3','Wilderness_Area4'
        ,'Soil_Type1', 'Soil_Type2','Soil_Type3','Soil_Type4','Soil_Type5','Soil_Type6','Soil_Type7','Soil_Type8'
        ,'Soil_Type9','Soil_Type10','Soil_Type11','Soil_Type12','Soil_Type13','Soil_Type14','Soil_Type15','Soil_Type16'
        ,'Soil_Type17','Soil_Type18','Soil_Type19','Soil_Type20','Soil_Type21','Soil_Type22','Soil_Type23','Soil_Type24'
        ,'Soil_Type25','Soil_Type26','Soil_Type27','Soil_Type28','Soil_Type29','Soil_Type30','Soil_Type31','Soil_Type32'
        ,'Soil_Type33','Soil_Type34','Soil_Type35','Soil_Type36','Soil_Type37','Soil_Type38','Soil_Type39','Soil_Type40'
        ,'Cover_Type']

dataframe = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz',compression='gzip',names=header)

In [12]:
dataframe.shape

(581012, 55)

In [13]:
val_dataframe = dataframe.sample(frac=0.2, random_state=1337)
train_dataframe = dataframe.drop(val_dataframe.index)

In [28]:
train_dataframe.shape

(464810, 55)

In [14]:
val_dataframe.to_csv("val_convtype.csv", index=False)
train_dataframe.to_csv("train_convtype.csv", index=False)

In [45]:
train_data_using_csv = tf.data.experimental.make_csv_dataset(
    "train_convtype.csv",
    batch_size=32, label_name="Cover_Type"
).prefetch(2)

In [46]:
val_data_using_csv = tf.data.experimental.make_csv_dataset(
    "val_convtype.csv",
    batch_size=32, label_name="Cover_Type"
).prefetch(2)

In [40]:
print(train_data_using_csv.take(1))

<_TakeDataset element_spec=(OrderedDict([('Elevation', TensorSpec(shape=(32,), dtype=tf.int32, name=None)), ('Aspect', TensorSpec(shape=(32,), dtype=tf.int32, name=None)), ('Slope', TensorSpec(shape=(32,), dtype=tf.int32, name=None)), ('Horizontal_Distance_To_Hydrology', TensorSpec(shape=(32,), dtype=tf.int32, name=None)), ('Vertical_Distance_To_Hydrology', TensorSpec(shape=(32,), dtype=tf.int32, name=None)), ('Horizontal_Distance_To_Roadways', TensorSpec(shape=(32,), dtype=tf.int32, name=None)), ('Hillshade_9am', TensorSpec(shape=(32,), dtype=tf.int32, name=None)), ('Hillshade_Noon', TensorSpec(shape=(32,), dtype=tf.int32, name=None)), ('Hillshade_3pm', TensorSpec(shape=(32,), dtype=tf.int32, name=None)), ('Horizontal_Distance_To_Fire_Points', TensorSpec(shape=(32,), dtype=tf.int32, name=None)), ('Wilderness_Area1', TensorSpec(shape=(32,), dtype=tf.int32, name=None)), ('Wilderness_Area2', TensorSpec(shape=(32,), dtype=tf.int32, name=None)), ('Wilderness_Area3', TensorSpec(shape=(32,),

In [18]:
feature_space = FeatureSpace(
    features={
        # Categorical features encoded as integers
        "Wilderness_Area1":"integer_categorical",
        "Wilderness_Area2":"integer_categorical",
        "Wilderness_Area3":"integer_categorical",
        "Wilderness_Area4":"integer_categorical",
        "Soil_Type1": "integer_categorical",
        "Soil_Type2": "integer_categorical",
        "Soil_Type3": "integer_categorical",
        "Soil_Type4": "integer_categorical",
        "Soil_Type5": "integer_categorical",
        "Soil_Type6": "integer_categorical",
        "Soil_Type7": "integer_categorical",
        "Soil_Type8": "integer_categorical",
        "Soil_Type9": "integer_categorical",
        "Soil_Type10": "integer_categorical",
        "Soil_Type11": "integer_categorical",
        "Soil_Type12": "integer_categorical",
        "Soil_Type13": "integer_categorical",
        "Soil_Type14": "integer_categorical",
        "Soil_Type15": "integer_categorical",
        "Soil_Type16": "integer_categorical",
        "Soil_Type17": "integer_categorical",
        "Soil_Type18": "integer_categorical",
        "Soil_Type19": "integer_categorical",
        "Soil_Type20": "integer_categorical",
        "Soil_Type21": "integer_categorical",
        "Soil_Type22": "integer_categorical",
        "Soil_Type23": "integer_categorical",
        "Soil_Type24": "integer_categorical",
        "Soil_Type25": "integer_categorical",
        "Soil_Type26": "integer_categorical",
        "Soil_Type27": "integer_categorical",
        "Soil_Type28": "integer_categorical",
        "Soil_Type29": "integer_categorical",
        "Soil_Type30": "integer_categorical",
        "Soil_Type31": "integer_categorical",
        "Soil_Type32": "integer_categorical",
        "Soil_Type33": "integer_categorical",
        "Soil_Type34": "integer_categorical",
        "Soil_Type35": "integer_categorical",
        "Soil_Type36": "integer_categorical",
        "Soil_Type37": "integer_categorical",
        "Soil_Type38": "integer_categorical",
        "Soil_Type39": "integer_categorical",
        "Soil_Type40": "integer_categorical",

        # Numerical features to normalize
        "Elevation": "float_normalized",
        "Slope": "float_normalized",
        "Aspect": "float_normalized",
        "Horizontal_Distance_To_Hydrology": "float_normalized",
        "Vertical_Distance_To_Hydrology": "float_normalized",
        "Horizontal_Distance_To_Roadways": "float_normalized",
        "Hillshade_9am": "float_normalized",
        "Hillshade_Noon": "float_normalized",
        "Hillshade_3pm": "float_normalized",
        "Horizontal_Distance_To_Fire_Points": "float_normalized",
    },
    # Our utility will one-hot encode all categorical
    # features and concat all features into a single
    # vector (one vector per sample).
    output_mode="concat",
)

In [None]:
train_data_using_csv_with_no_labels = train_data_using_csv.map(lambda x, y: x)
feature_space.adapt(train_data_using_csv_with_no_labels.take(10000))

In [None]:
counter = 0
for x, y in train_data_using_csv.take(20000):
    print(x)
    print(y)
    counter +=1
    # print(len(x.keys()))
    # print(feature_space(x))
    break

print("Counter", counter)

In [None]:
train_dataframe["Cover_Type"].head(32)

اشکال اینجاست وقتی با همه داده ها میریم جلو تا همین جا پیش میره. اما وقتی یه قسمتش رو برمیدارم اویکه

In [49]:
def log_fun(x, y):
    print("label: ", y.numpy())
    return x, y

In [50]:
train_data_using_csv = train_data_using_csv.map(log_fun).map(lambda x, y: (x, tf.one_hot( 
    tf.cast(y, tf.int32),
    8,
    on_value=1,
    off_value=0))).map(log_fun).take(20)

val_data_using_csv = val_data_using_csv.map(lambda x, y: (x, tf.one_hot( 
    tf.cast(y, tf.int32),
    8,
    on_value=1,
    off_value=0)))

AttributeError: in user code:

    File "/tmp/ipykernel_15037/1621988313.py", line 2, in log_fun  *
        print("label: ", y.numpy())

    AttributeError: 'Tensor' object has no attribute 'numpy'


In [23]:
preprocessed_train_ds = train_data_using_csv.map(
    lambda x, y: (feature_space(x), y)
)
# preprocessed_train_ds = preprocessed_train_ds.prefetch(tf.data.AUTOTUNE)

preprocessed_val_ds = val_data_using_csv.map(
    lambda x, y: (feature_space(x), y)
)
# preprocessed_val_ds = preprocessed_val_ds.prefetch(tf.data.AUTOTUNE)


###Make Model and Fit###
dict_inputs = feature_space.get_inputs()
encoded_features = feature_space.get_encoded_features()

x = keras.layers.Dense(512, activation="relu")(encoded_features)
x = keras.layers.Dense(512, activation="relu")(x)
x = keras.layers.Dense(512, activation="relu")(x)
predictions = keras.layers.Dense(8, activation="softmax")(x)

training_model = keras.Model(inputs=encoded_features, outputs=predictions)
training_model.compile(
    optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
)

inference_model = keras.Model(inputs=dict_inputs, outputs=predictions)


training_model.fit(
    preprocessed_train_ds.take(13000), epochs=20, validation_data= preprocessed_val_ds.take(3000)
)

2023-04-21 14:51:31.707585: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_125' with dtype resource
	 [[{{node Placeholder/_125}}]]
2023-04-21 14:51:31.712550: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_8' with dtype int32 and shape [1]
	 [[{{node Placeholder/_8}}]]


Epoch 1/20

2023-04-21 14:54:54.384230: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_7' with dtype int32 and shape [1]
	 [[{{node Placeholder/_7}}]]
2023-04-21 14:54:54.389748: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_56' with dtype int32 and shape [1]
	 [[{{node Placeholder/_56}}]]


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20

KeyboardInterrupt: 