In [1]:
# Install this package to use Colab's GPU for training
!apt install --allow-change-held-packages libcudnn8=8.4.1.50-1+cuda11.6

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following package was automatically installed and is no longer required:
  libnvidia-common-460
Use 'apt autoremove' to remove it.
The following packages will be REMOVED:
  libcudnn8-dev
The following held packages will be changed:
  libcudnn8
The following packages will be upgraded:
  libcudnn8
1 upgraded, 0 newly installed, 1 to remove and 10 not upgraded.
Need to get 420 MB of archives.
After this operation, 1,622 MB disk space will be freed.
Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  libcudnn8 8.4.1.50-1+cuda11.6 [420 MB]
Fetched 420 MB in 13s (32.9 MB/s)
(Reading database ... 123934 files and directories currently installed.)
Removing libcudnn8-dev (8.1.1.33-1+cuda11.2) ...
update-alternatives: removing manually selected alternative - switching libcudnn to auto mode
(Reading database ... 123911 files and directories currently installed.)
Prepar

In [None]:
#import zipfile
# Unzip the training dataset
#local_zip = 'train.csv.zip'
#zip_ref = zipfile.ZipFile(local_zip, 'r')
#zip_ref.extractall()

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import csv
# Initialize dictionary
columns={'carat':[],'cut':[],'color':[],'clarity':[],'depth':[],'table':[],'x':[],'y':[],'z':[],'price':[]}

# Open CSV file
with open('/content/drive/MyDrive/Colab Notebooks/train.csv') as csvfile:
    # Initialize reader
    reader = csv.reader(csvfile, delimiter=',')
  
    # Skip the first line
    next(reader)
  
    # Append row and sunspot number to lists
    for row in reader:
        for col in range(len(columns)):
            columns[list(columns.keys())[col]].append(row[col])

In [4]:
import pandas as pd
import numpy as np

df = pd.DataFrame.from_dict(columns)

df = df.astype({"carat": np.float64, 
                "depth":np.float64, 
                "table":np.float64, 
                "x":np.float64, 
                "y":np.float64, 
                "z":np.float64,
                "price":np.float64})

In [5]:
column_indices = {name: i for i, name in enumerate(df.columns)}

n = len(df)
train_df = df[0:int(n*0.8)]
val_df = df[int(n*0.8):]

num_features = df.shape[1]

df.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,x,y,z,price
0,0.544971,Ideal,E,SI1,63.911017,52.575829,5.447452,4.053076,3.701485,3.558
1,2.073682,Premium,J,SI1,67.642925,57.660288,6.266665,7.661507,6.177051,24.738
2,0.606198,Premium,G,VS1,55.61731,60.388787,6.800151,5.096966,3.480326,3.63
3,0.695397,Premium,G,SI2,60.587811,71.325721,5.366931,6.616767,4.454435,4.218
4,0.282651,Ideal,E,VVS2,68.970056,54.372123,4.920104,4.409408,4.361358,1.578


In [6]:
df_num = train_df.select_dtypes('number')

In [7]:
train_min = train_df.min(numeric_only=True)
train_max = train_df.max(numeric_only=True)

norm_train=(train_df[df_num.columns] - train_min) / (train_max-train_min)
norm_val=(val_df[df_num.columns] - train_min) / (train_max-train_min)

In [8]:
min_price=train_min['price']
max_price=train_max['price']

In [9]:
norm_train=norm_train.join(train_df[['cut','color','clarity']])
norm_val=norm_val.join(val_df[['cut','color','clarity']])

In [10]:
norm_train.head()

Unnamed: 0,carat,depth,table,x,y,z,price,cut,color,clarity
0,0.118571,0.583269,0.325378,0.470349,0.30098,0.461505,0.078672,Ideal,E,SI1
1,0.464025,0.673428,0.431509,0.546659,0.667567,0.77282,0.652066,Premium,J,SI1
2,0.132407,0.382902,0.488462,0.596354,0.407031,0.433694,0.080622,Premium,G,VS1
3,0.152564,0.502984,0.716755,0.462849,0.56143,0.556193,0.09654,Premium,G,SI2
4,0.059293,0.705491,0.362873,0.421227,0.33718,0.544488,0.025069,Ideal,E,VVS2


In [11]:
train_features = pd.get_dummies(norm_train).drop('price', axis=1)
val_features = pd.get_dummies(norm_val).drop('price', axis=1)

In [12]:
train_labels = norm_train['price']
val_labels = norm_val['price']

In [13]:
num_features = train_features.shape[1]
num_features

26

In [14]:
import tensorflow as tf
# Turn our data into TensorFlow Datasets
train_dataset = tf.data.Dataset.from_tensor_slices((train_features, tf.expand_dims(train_labels,axis=1) ))
valid_dataset = tf.data.Dataset.from_tensor_slices((val_features, tf.expand_dims(val_labels,axis=1) ))

In [15]:
# Take the TensorSliceDataset's and turn them into prefetched batches
train_dataset = train_dataset.batch(32).prefetch(tf.data.AUTOTUNE)
valid_dataset = valid_dataset.batch(32).prefetch(tf.data.AUTOTUNE)

In [16]:
def Personal_callback(model_name, metrics, threshold_metric, ep, lr_i=0.001):
    
    """
    Stop training
    """
    class stop_training(tf.keras.callbacks.Callback):
        def on_epoch_end(self, epoch, logs = {}):                                #on_batch_end / on_epoch_end
            if(logs.get(metrics)<threshold_metric and logs.get('val_'+metrics) <threshold_metric):    #logs.get('loss')<0.5 and logs.get('val_accuracy')>0.8
                print("\Cancelling training!")
                self.model.stop_training = True
    stop_train = stop_training()
    
    """
    Learning Rate Decay
    """
    global LR_init
    LR_init=lr_i
    
    class learning_decay(tf.keras.callbacks.Callback):
        def on_epoch_end(self, batch, logs={}):
            lr = self.model.optimizer.lr
            global LR_init
            new_lr = (LR_init) * 10.**(-(batch+1.)/(ep*10))  #Epoch
            if lr > new_lr:
                tf.keras.backend.set_value(self.model.optimizer.lr, new_lr)
            else:
                LR_init=lr

    lr_decay = learning_decay()
    
    """
    Reduce Learning Rate
    """
    # Creating learning rate reduction callback
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor= metrics,  
                                                     factor=0.2,         # new_lr = lr * factor
                                                     patience=2,         # number of epochs with no improvement after which learning rate will be reduced
                                                     verbose=1,          # print out when learning rate goes down 
                                                     min_lr=1e-15)        # lower bound on the learning rate
    """
    Early Stopping
    """
    # Setup EarlyStopping callback to stop training if model's val_loss doesn't improve for 3 epochs
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor = metrics,        # watch the val loss metric
                                                  patience = 10,                # if val loss decreases for 3 epochs in a row, stop training
                                                  min_delta = 0.00001,            # Minimum change in the monitored quantity to qualify as an improvement
                                                  restore_best_weights = False,
                                                     verbose=1)
    """
    Model Checkpoint - Train metrics
    """
    # Create ModelCheckpoint callback to save model's progress
    chk_train_path = "checkpoint_path/"+ model_name +"/train/chk_train.ckpt"              # saving weights requires ".ckpt" extension
    chk_train = tf.keras.callbacks.ModelCheckpoint(filepath=chk_train_path,
                                                         monitor=metrics,        # save the model weights with best validation accuracy
                                                         save_weights_only=True,   # set to False to save the entire model
                                                         save_best_only=True,     # set to True to save only the best model instead of a model every epoch 
                                                         save_freq="epoch",        # save every epoch
                                                         verbose=0)                # don't print out whether or not model is being saved 
    
    """
    Model Checkpoint - Train metrics
    """
    # Create ModelCheckpoint callback to save model's progress
    chk_val_path = "checkpoint_path/"+ model_name +"/val/chk_val.ckpt"              # saving weights requires ".ckpt" extension
    chk_val = tf.keras.callbacks.ModelCheckpoint(filepath=chk_val_path,
                                                         monitor='val_'+metrics,        # save the model weights with best validation accuracy
                                                         save_weights_only=True,   # set to False to save the entire model
                                                         save_best_only=True,     # set to True to save only the best model instead of a model every epoch 
                                                         save_freq="epoch",        # save every epoch
                                                         verbose=0)                # don't print out whether or not model is being saved 
    
    return [stop_train, lr_decay, reduce_lr, early_stopping, chk_train, chk_val]

In [29]:
model_4 = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(26,)),
    tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=1)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(182, return_sequences=True)),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(150, return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True)),
    tf.keras.layers.Dense(units=32, activation="relu"),
    tf.keras.layers.Dense(units=16, activation="relu"),
    tf.keras.layers.Dense(units=1, activation="relu"),
    tf.keras.layers.Reshape([1, -1])
])

model_4.compile(loss='mse',
                  optimizer=tf.optimizers.Adam(),
                  metrics=['mae'])

#INITIAL WEIGHTS
InitialW_4 = model_4.get_weights()

In [30]:
model_4.set_weights(InitialW_4)
tf.keras.backend.set_value(model_4.optimizer.lr, 0.001)
history = model_4.fit(train_dataset, 
                        epochs=100,
                        validation_data=valid_dataset,
                   callbacks=Personal_callback(model_name='model_4/FIT_1', 
                                                      metrics='mae', 
                                                      threshold_metric=0.02,
                                                            ep=100))
FIT_1_4=model_4.get_weights()

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 20: ReduceLROnPlateau reducing learning rate to 0.00019099852070212365.
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 27: ReduceLROnPlateau reducing learning rate to 2.723077195696533e-05.
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 34: ReduceLROnPlateau reducing learning rate to 3.30437978846021e-06.
Epoch 35/100
Epoch 36/100
Epoch 36: ReduceLROnPlateau reducing learning rate to 5.61202978133224e-07.
Epoch 37/100
Epoch 38/100
Epoch 38: ReduceLROnPlateau reducing learning rate to 9.44386897572258e-08.
Epoch 39/100
Epoch 40/100
Epoch 40: ReduceLROnPlateau reducing learning rate to 1.5746351778034295e-08.
Epoch 41/100
Epoch 42/100
Ep

## Generate assigment

In [33]:
import csv
# Initialize dictionary
columns={'carat':[],'cut':[],'color':[],'clarity':[],'depth':[],'table':[],'x':[],'y':[],'z':[]}

# Open CSV file
with open('/content/drive/MyDrive/Colab Notebooks/test.csv') as csvfile:
    # Initialize reader
    reader = csv.reader(csvfile, delimiter=',')
  
    # Skip the first line
    next(reader)
  
    # Append row and sunspot number to lists
    for row in reader:
        for col in range(len(columns)):
            columns[list(columns.keys())[col]].append(row[col])

In [34]:
import pandas as pd
import numpy as np

subm = pd.DataFrame.from_dict(columns)

subm = subm.astype({"carat": np.float64, 
                "depth":np.float64, 
                "table":np.float64, 
                "x":np.float64, 
                "y":np.float64, 
                "z":np.float64})

In [None]:
subm.describe()

Unnamed: 0,carat,depth,table,x,y,z
count,3000.0,3000.0,3000.0,3000.0,3000.0,3000.0
mean,0.793841,61.853412,57.456326,5.747509,5.717945,3.519011
std,0.48663,5.10127,5.486476,1.466976,1.532772,1.241848
min,0.003191,42.036789,37.724893,1.680825,0.926215,0.0558
25%,0.404519,58.487198,53.794018,4.692362,4.63142,2.668319
50%,0.684529,61.868152,57.420026,5.66919,5.644216,3.497775
75%,1.086494,65.305668,61.024028,6.727281,6.767933,4.360686
max,3.952778,81.060053,81.169067,10.445116,10.434292,7.666545


In [35]:
df_num = train_df.select_dtypes('number')

In [36]:
train_max = train_df.drop('price', axis=1).max(numeric_only=True)
train_min = train_df.drop('price', axis=1).min(numeric_only=True)

In [37]:
norm_subm = (subm[df_num.columns[:-1]] - train_min) / (train_max-train_min)

In [38]:
norm_subm = norm_subm.join(subm[['cut','color','clarity']])

In [39]:
subm_features = pd.get_dummies(norm_subm)

In [40]:
import tensorflow as tf
# Turn our data into TensorFlow Datasets
subm_dataset = tf.data.Dataset.from_tensor_slices(subm_features)
subm_dataset = subm_dataset.batch(1, drop_remainder=True).prefetch(1)

In [41]:
model_load = tf.keras.models.clone_model(model_4)

model_load.compile(loss='mse',
                  optimizer=tf.optimizers.Adam(),
                  metrics=['mae'])

model_load.load_weights("checkpoint_path/model_4/FIT_1/train/chk_train.ckpt")
load_evaluate=model_load.evaluate(valid_dataset, batch_size=32)



In [51]:
model_load = tf.keras.models.clone_model(model_4)

model_load.compile(loss='mse',
                  optimizer=tf.optimizers.Adam(),
                  metrics=['mae'])

model_load.load_weights("checkpoint_path/model_4/FIT_1/train/chk_train.ckpt")
load_evaluate=model_load.evaluate(train_dataset, batch_size=32)



In [42]:
x=model_load.predict(subm_dataset)



In [43]:
x=np.squeeze(x)

In [44]:
subm_prices=x*(max_price-min_price)+min_price

In [45]:
id_subm=np.arange(start=0, stop=len(subm_prices))

In [46]:
dict_subm={'id':id_subm, 'price':subm_prices}

In [47]:
df_subm = pd.DataFrame.from_dict(dict_subm)

In [48]:
df_subm=df_subm.set_index('id')

In [50]:
df_subm.to_csv('/content/drive/MyDrive/Colab Notebooks/subm.zip')  