In [11]:
import pandas as pd
import numpy as np
import h5py
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import sys, os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

#### Load HDF5 utils

In [2]:
def print_name(name):
    print(name)

In [3]:
def get_dataset_paths(folder, data):
    
    sub_folders = []
    data[folder].visit(sub_folders.append)
    
    path = []
    
    for sub_folder in sub_folders:
        path.append(folder + '/' + sub_folder)
    
    return path[1::2]

In [4]:
def get_train_test_valid_index(max_size, train_size, valid_size):
    
    all_idx = np.arange(max_size)
    
    train_idx = np.random.choice(all_idx, size=train_size, replace=False)
    
    # Get the indexs that it ins't in train_idx
    diff_idx = np.asarray(list(set(all_idx).difference(set(train_idx))))
    
    valid_idx = np.random.choice(diff_idx, size=valid_size, replace=False)
    
    # Index used so far
    union_idx = set(valid_idx).union(set(train_idx))
    
    # Get the indexs that weren't used yet.
    test_idx = np.asarray(list(set(all_idx).difference(union_idx)))
    
    size = train_idx.size + test_idx.size + valid_idx.size
    
    if size != max_size :
        print('A soma dos tamanhos dos dataset é diferente do tamanho máximo')
    
    return train_idx, test_idx, valid_idx

In [5]:
# Read only (r), read and write (r+)
data = h5py.File('../Data/ising_conf-ener.h5', 'r')

In [6]:
# Read attributes, thery are almost the same for all folders.
data_attrs = [x for x in data['L=60'].attrs]

In [15]:
# Lattice size
L = data[main_folder].attrs['Lattice Size']

# Main folder name (L=60,90,120)
main_folder = 'L=' + str(L)

# Critical temperature
tc = data[main_folder].attrs['Critical Temperature']

# Critical temperature conf-ener folder
tc_path = '{0}/T={1:6.4f}/conf-ener'.format(main_folder, tc) 

# All lattice sizes
lattice_size = data[main_folder].attrs['Lattice Size']

# All temperatures in the main folder
temperatures = data[main_folder].attrs['Temperatures']
temperatures = np.delete(temperatures, np.where(temperatures==tc))

# Paths to the datasets in main folder
dataset_paths = get_dataset_paths(main_folder, data)
dataset_paths.remove(tc_path)

# Monte Carlo steps
mc_steps = np.int32(data[main_folder].attrs['Monte Carlo Steps'])

# Dataset total size
dataset_size = temperatures.size * mc_steps

# Train percet
train_perc = 0.95

# Validation percet
valid_perc = 0.02

# Test percet
test_perc = 0.03

#### Building the dataset

In [9]:
train_size = np.ceil(mc_steps * train_perc).astype(np.int32)
valid_size = np.ceil(mc_steps * valid_perc).astype(np.int32)
test_size = mc_steps - (valid_size+train_size)

train_idx = np.zeros((train_size, len(dataset_paths)), dtype=np.int32)
valid_idx = np.zeros((valid_size, len(dataset_paths)), dtype=np.int32)
test_idx = np.zeros((test_size, len(dataset_paths)), dtype=np.int32)

for idx, _ in enumerate(dataset_paths):
    train_idx[:, idx], test_idx[:, idx], valid_idx[:, idx] = get_train_test_valid_index(mc_steps, train_size,
                                                                                        valid_size)

In [166]:
data_slice = np.floor(train_size * 0.1).astype(np.int32)

train_batch = np.zeros((data_slice * temperatures.size, L * L), dtype=np.int16)

label_batch = np.zeros(data_slice * temperatures.size, dtype=np.int16)

for idx, dataset_path in enumerate(dataset_paths):
    
    train_set = data[dataset_path]['Configuration']
    
    train_batch[idx * data_slice: (idx + 1) * data_slice, :] = train_set[0, train_idx[:data_slice, idx]]
    
    if temperatures[idx] - 2.26 > 0.0:
        label_batch[idx * data_slice: (idx + 1) * data_slice] = 1
    

0.040000000000000036 13
0.14000000000000012 14
0.2400000000000002 15
0.3400000000000003 16
0.4400000000000004 17
0.5400000000000005 18
0.6400000000000006 19
0.7400000000000007 20
0.8400000000000007 21
0.9400000000000008 22
1.040000000000001 23
1.140000000000001 24
1.240000000000001 25
1.3400000000000012 26
1.4400000000000013 27
1.5400000000000014 28
1.6400000000000015 29


https://www.pyimagesearch.com/2018/12/24/how-to-use-keras-fit-and-fit_generator-a-hands-on-tutorial/
https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly

In [None]:
def get_train_batch(data, train_idx):
    train = np.zeros(())

In [39]:
model = keras.Sequential([
    keras.layers.Dense(100, input_shape=( L * L,), activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(1, activation='softmax')
])

In [41]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [40]:
model.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 100)               360100    
_________________________________________________________________
dropout_3 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 1)                 101       
Total params: 360,201
Trainable params: 360,201
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.fit_generator(generator=,
                   steps_per_epoch = int(3800 // batch_size),
                   epochs = 10,
                   verbose = 1,
                   validation_data = ,
                   validation_steps = int(950 // batch_size))