In [4]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
#!pip install -q -U keras-tuner
import kerastuner as kt
import IPython

ImportError: cannot import name 'preprocessing' from 'tensorflow.keras.layers.experimental' (C:\Users\Neil\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\keras\api\_v1\keras\layers\experimental\__init__.py)

In [None]:
print("NumPy version " + np.__version__)
print("Tensorflow version " + tf.__version__)

In [None]:
## Data Reading ##
#tf.data.experimental.CsvDataset("data2/train_drug.csv", "float32")
path="https://pzfczh27edy8hocjobyt5a-on.drv.tw/webserver/dm/"
results = pd.read_csv(path + "train_targets_scored.csv")
testSet = pd.read_csv(path + "test_features.csv")
train_features = pd.read_csv(path + "train_features_court.csv")

In [None]:
## Parameters ##
validationProportion = 0.2

N = len(train_features) #Nombre d'échantillons testés
I = train_features.shape[1]-1 #Nombre de input
M = results.shape[1]-1 #Nombre de pathologies obserables

In [None]:
## HYPER parameters ##
nbEpoch = 30 
learningRate = 0.01 #with the adam optimizer

# nombre de couches cachées
# nombre de neurones
# fonctions d'activation
# epsilon (adam)

# cf randomizedSearchCV

In [None]:
## Normalization ##
maxVal=np.max(np.max(np.abs(train_features.iloc[:,4:])))
train_features.iloc[:,4:] = train_features.iloc[:,4:]/maxVal
train_features

In [None]:
## replacing sig_id by drug_id ##
dataSet = train_features.join(results, lsuffix='sig_id', rsuffix='sig_id')
dataSet = dataSet.drop(columns=['sig_idsig_id'])

dataSet

In [None]:
## Shuffling ##
dataSet = dataSet.reindex(np.random.permutation(dataSet.index))
dataSet

In [None]:
## Replacing categories values by numbers ##
features_to_convert=['cp_type','cp_time','cp_dose']
for feat in features_to_convert:
    dataSet[feat] = pd.Categorical(dataSet[feat])
    dataSet[feat] = dataSet[feat].cat.codes
    
dataSet

In [None]:
## Splitting into training and validation ##
trainingSize = int((1-validationProportion) * N)

trainingSet = dataSet.iloc[:trainingSize,:] 
validationSet = dataSet.iloc[trainingSize:,:]

print("Training size:", len(trainingSet))
print("Validation size:", len(validationSet))

In [None]:
XTrain = trainingSet.iloc[:,:I]
YTrain = trainingSet.iloc[:,I:]
XTrain

In [None]:
## Batching ##
"""
# A utility method to create a tf.data dataset from a Pandas Dataframe
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
    dataframe = dataframe.copy()
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), dataframe))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
    ds = ds.batch(batch_size)
    ds = ds.prefetch(batch_size)
    return ds

trainingds = df_to_dataset(trainingSet)
trainingds
"""

In [None]:
## Training ##

model_non_optimized = tf.keras.models.Sequential([tf.keras.layers.Flatten(),
                                    tf.keras.layers.Dense(512, activation=tf.nn.relu),
                                    tf.keras.layers.Dense(512, activation = 'softplus'),
                                    tf.keras.layers.Dense(M, activation=tf.nn.sigmoid)])

#sigmoid is more appropriate rather softmax because we are multiple factors of cancer 

model_non_optimized.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])



In [None]:
def model_generation(hp):
  model = keras.Sequential()
  model.add(keras.layers.Flatten())
  
  # Tune the number of units in the first Dense layer
  # Choose an optimal value between 32-1024
  hp_units = hp.Int('units', min_value = 32, max_value = 1024, step = 32)
  hp_units_2 = hp.Int('units_2', min_value = 32, max_value = 1024, step = 32)
  model.add(keras.layers.Dense(units = hp_units, activation = 'relu'))
  model.add(keras.layers.Dense(units = hp_units_2, activation = 'softplus'))
  model.add(keras.layers.Dense(M, activation = tf.nn.sigmoid))

  # Tune the learning rate for the optimizer 
  # Choose an optimal value from 0.01, 0.001, 0.0001 or 0.000001
  hp_learning_rate = hp.Choice('learning_rate', values = [1e-2, 1e-3, 1e-4, 1e-6]) 
  
  model.compile(optimizer = keras.optimizers.Adam(learning_rate = hp_learning_rate),
                loss='binary_crossentropy', 
                metrics = ['accuracy'])
  
  return model

In [None]:
#Resultats avant hyperparameters
model_non_optimized.fit(XTrain, YTrain, epochs=nbEpoch)

In [None]:
tuner = kt.Hyperband(model_generation,
                     objective = 'accuracy', 
                     max_epochs = 10
                     )             

In [None]:
#function to clear the output when we look for the best hyperparameters
class ClearTrainingOutput(tf.keras.callbacks.Callback):
  def on_train_end(*args, **kwargs):
    IPython.display.clear_output(wait = True)

In [None]:
tuner.search(XTrain, YTrain, epochs = 10, callbacks = [ClearTrainingOutput()])

# Get the optimal hyperparameters
optimized_param = tuner.get_best_hyperparameters(num_trials = 1)[0]
print(f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is {optimized_param.get('units')}, in the second it's {optimized_param.get('units_2')} and the optimal learning rate for the optimizer
is {optimized_param.get('learning_rate')}.
""")

### on relance le modèle avec nos paramètres optimisés
model_optimized = tuner.hypermodel.build(optimized_param)
model_optimized.fit(XTrain, YTrain, epochs=nbEpoch)
