# Miniproject Task1

In [5]:
# %pip install FLAML
%pip install scikit-learn
import flaml

Collecting scikit-learn
  Using cached scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl.metadata (31 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Using cached scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl (11.2 MB)
Using cached joblib-1.4.2-py3-none-any.whl (301 kB)
Using cached threadpoolctl-3.5.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, joblib, scikit-learn
Successfully installed joblib-1.4.2 scikit-learn-1.6.1 threadpoolctl-3.5.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [14]:
import numpy as np
from numpy import mean
from numpy import std

import pandas as pd

In [15]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import Conv1D
from keras.layers import Conv2D
from keras.layers import MaxPooling1D
from keras.callbacks import EarlyStopping

In [16]:
from keras.models import Sequential
from keras.layers import Dense, Flatten, Dropout, Conv2D
from keras.callbacks import EarlyStopping
import keras_tuner as kt  # Import Keras Tuner
import tensorflow as tf


In [17]:
# Creating Training and testing data

# read data from a file and normalize it
def read_and_norm(type_,type):
    with open('dataset/{}/{}.txt'.format( type_,type), 'rb') as f:
        matrix = [[float(x) for x in line.split()] for line in f]
    matrix = np.array(matrix)
    min_m = matrix.min().min()
    max_m = matrix.max().max()
    matrix = ((matrix - min_m) / (max_m - min_m))
    return matrix

In [18]:
def _get_data(hr_file_path,type_):
    # read shape, el and dist
    shape = read_and_norm(type_,'shape')
    el = read_and_norm(type_,'el')
    dist = read_and_norm(type_,'dist')

    # read labels
    classification = np.loadtxt('dataset/{}/classification.txt'.format(type_))
    classification = np.array(classification).reshape(-1,1)    

    with open(hr_file_path, "r") as file:
        hr = []
        righe_con_9_colonne = []
        for indice, riga in enumerate(file):
            colonne = riga.split()
            if len(colonne) == 9:
                righe_con_9_colonne.append(indice)
            else:
                hr.append(colonne)
    hr = [[float(string) for string in inner] for inner in hr]
    hr = np.array(hr)
    
    classification = np.delete(classification, righe_con_9_colonne, 0)
    shape = np.delete(shape, righe_con_9_colonne, 0)
    el = np.delete(el, righe_con_9_colonne, 0)
    dist = np.delete(dist, righe_con_9_colonne, 0)


    data_X = np.array([p for p in zip(shape, dist, el, hr)])
    data_X = data_X.reshape(data_X.shape[0], data_X.shape[1], data_X.shape[2], 1)

    return data_X, classification

In [6]:
def tune_model(trainX, trainy, hp):
    model = Sequential()
    model.add(Conv2D(
        filters=hp.Int('filters_1', min_value=8, max_value=64, step=8),
        kernel_size=(4, 1),
        activation='relu',
        input_shape=(trainX.shape[1], trainX.shape[2], 1)
    ))
    model.add(Dropout(hp.Float('dropout_1', min_value=0.1, max_value=0.5, step=0.1)))

    model.add(Conv2D(
        filters=hp.Int('filters_2', min_value=4, max_value=32, step=4),
        kernel_size=(1, 3),
        activation='relu'
    ))
    model.add(Dropout(hp.Float('dropout_2', min_value=0.1, max_value=0.5, step=0.1)))

    model.add(Flatten())

    model.add(Dense(
        units=hp.Int('dense_1', min_value=16, max_value=128, step=16),
        activation='relu'
    ))
    model.add(Dropout(hp.Float('dropout_3', min_value=0.1, max_value=0.5, step=0.1)))

    model.add(Dense(
        units=hp.Int('dense_2', min_value=16, max_value=128, step=16),
        activation='relu'
    ))
    model.add(Dropout(hp.Float('dropout_4', min_value=0.1, max_value=0.5, step=0.1)))

    model.add(Dense(trainy.shape[1], activation='sigmoid'))

    lr = hp.Choice('learning_rate', values=[0.001, 0.0005, 0.0001])
    model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=lr), metrics=['accuracy'])
    
    return model

def best_model(trainX, trainy, testX, testy,save_model = 1, model_path = 'models'):

    tuner = kt.Hyperband(
        lambda hp: tune_model(trainX,trainy,hp),
        objective='val_accuracy',
        max_epochs=50,
        factor=3,
        directory='tuner_results',
        project_name='cnn_autotune'
    )

    early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)

    tuner.search(trainX, trainy, epochs=50, validation_split=0.2, callbacks=[early_stopping])

    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

    best_model = tuner.hypermodel.build(best_hps)
    history = best_model.fit(trainX, trainy, epochs=300, batch_size=1, verbose=1, validation_split=0.2, callbacks=[early_stopping])

	# evaluate model
    _, accuracy = best_model.evaluate(testX, testy, batch_size=1, verbose=1)

    pred_label_ = best_model.predict(testX,batch_size=1)

    pred_label = [1. if x >= 0.5 else 0. for x in pred_label_]
    results = pd.DataFrame({'Pred': pred_label,'Prob':pred_label_.reshape(-1), 'True': testy.reshape(-1)})


    if save_model:
        model_saved_path = '{}/my_model.keras'.format(model_path)
        best_model.save('{}/my_model.keras'.format(model_path))

    return accuracy, history, model_saved_path

    

In [7]:

# fit and evaluate a model
def evaluate_model_2dconv(trainX, trainy, testX, testy,save_model = 1, model_path = 'models'):
	verbose, epochs, batch_size = 1, 3, 1
	n_outputs = trainy.shape[1]
	model = Sequential()

	model.add(Conv2D(filters=9, kernel_size=(4,1), input_shape=trainX.shape[1:],activation='relu'))
	model.add(Dropout(0.25))
	model.add(Conv2D(filters=3, kernel_size=(1,3), activation='relu')) #(1,5)

	model.add(Dropout(0.25))

	model.add(Flatten())
	model.add(Dense(30, activation='relu'))
	model.add(Dropout(0.2))
	model.add(Dense(30, activation='relu'))
	model.add(Dropout(0.2))
	model.add(Dense(n_outputs, activation='sigmoid'))
	model.summary()
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

	es = EarlyStopping(monitor='val_accuracy',
				   mode='max',
				   patience=50,
				   restore_best_weights=True)

	history = model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose,validation_split=.2,callbacks=[es])#,callbacks=rlronp)#) #,callbacks=[es])

	# evaluate model
	_, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=1)

	pred_label_ = model.predict(testX,batch_size=batch_size)

	pred_label = [1. if x >= 0.5 else 0. for x in pred_label_]
	results = pd.DataFrame({'Pred': pred_label,'Prob':pred_label_.reshape(-1), 'True': testy.reshape(-1)})



	if save_model:
		model_saved_path = '{}/my_model.keras'.format(model_path)
		model.save('{}/my_model.keras'.format(model_path))

	return accuracy, history, model_saved_path



# summarize scores
def summarize_results(scores):
	print(scores)
	m, s = mean(scores), std(scores)
	# print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))
	return m,s


In [8]:
def train_model(train_X, train_Y,test_X, test_Y):
	
	repeats = 4
    # repeat experiment
	scores = list()
	for r in range(repeats):
		#score,history,model_saved_path = evaluate_model_2dconv(train_X, train_Y, test_X, test_Y)
		score,history,model_saved_path = tune_model(train_X, train_Y, test_X, test_Y)
		score = score * 100.0
		#print('>#%d: %.3f' % (r+1, score))
		scores.append(score)

	mean,std_dev = summarize_results(scores)
    # it should return the saved model path and accuracy
	return mean,model_saved_path

In [9]:
def run_experiment(hr_file_path):
    best_model_accuracy = 0
    best_model_path = None
    # get training data
    type_ = "train"
    train_X, train_Y = _get_data(hr_file_path,type_)
    
    type_ = "test"
    test_X, test_Y = _get_data(hr_file_path,type_)

    print(train_X.shape, train_Y.shape)
    print(test_X.shape, test_Y.shape)
    current_model_best_accuracy,history,path = best_model(train_X, train_Y,test_X, test_Y)

    if current_model_best_accuracy > best_model_accuracy:
        best_model_accuracy = current_model_best_accuracy   
        best_model_path = path
    else:
        pass

    return best_model_accuracy, best_model_path



#get_train_test_data("datatset_new/train/hr_ARGP820101.txt")
best_model_accuracy,best_model_path = run_experiment("dataset/train/hr.txt")
print(best_model_accuracy)

(2990, 4, 10, 1) (2990, 1)
(1282, 4, 10, 1) (1282, 1)
Reloading Tuner from tuner_results/cnn_autotune/tuner0.json
Epoch 1/300


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2392/2392[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 553us/step - accuracy: 0.6618 - loss: 0.6120 - val_accuracy: 0.7809 - val_loss: 0.4678
Epoch 2/300
[1m2392/2392[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 525us/step - accuracy: 0.7811 - loss: 0.4781 - val_accuracy: 0.7391 - val_loss: 0.4804
Epoch 3/300
[1m2392/2392[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 520us/step - accuracy: 0.7887 - loss: 0.4695 - val_accuracy: 0.7559 - val_loss: 0.4849
Epoch 4/300
[1m2392/2392[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 522us/step - accuracy: 0.7617 - loss: 0.4930 - val_accuracy: 0.7776 - val_loss: 0.4901
Epoch 5/300
[1m2392/2392[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 523us/step - accuracy: 0.8040 - loss: 0.4618 - val_accuracy: 0.7943 - val_loss: 0.4461
Epoch 6/300
[1m2392/2392[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 570us/step - accuracy: 0.7928 - loss: 0.4641 - val_accuracy: 0.7793 - val_loss: 0.4417
Epoch 7/30

## Trying to train using AutoML

In [20]:
type_ = "train"
hr_file_path = "dataset/train/hr.txt"
train_X, train_Y = _get_data(hr_file_path,type_)

type_ = "test"
test_X, test_Y = _get_data(hr_file_path,type_)

print(train_X.shape, train_Y.shape)
print(test_X.shape, test_Y.shape)

(2990, 4, 10, 1) (2990, 1)
(1282, 4, 10, 1) (1282, 1)


In [4]:
%pip show flaml
import flaml

Name: FLAML
Version: 2.3.4
Summary: A fast library for automated machine learning and tuning
Home-page: https://github.com/microsoft/FLAML
Author: Microsoft Corporation
Author-email: hpo@microsoft.com
License: 
Location: /Users/pulkit/Desktop/mini_project/miniproject/lib/python3.12/site-packages
Requires: NumPy
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [8]:
%pip install FLAML==2.1.2


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [10]:
from flaml.automl import AutoML

ImportError: cannot import name 'AutoML' from 'flaml.automl' (/Users/pulkit/Desktop/mini_project/miniproject/lib/python3.12/site-packages/flaml/automl/__init__.py)