In [1]:
import numpy as np
import pandas as pd


from scipy.stats import bernoulli
from sklearn.preprocessing import StandardScaler

from keras.layers import Dense
from keras.models import Sequential
from keras.losses import categorical_crossentropy

from tunnel_generator import *

ModuleNotFoundError: No module named 'tunnel_generator'

$$\Delta \Psi = (V_{(x)}+E)\Psi$$

![](notebook-imgs/image1.png)
![](notebook-imgs/image2.png)
![](notebook-imgs/image3.png)

In [166]:
def balanced_samples(d, partitions=4, **kwargs):
    """
    kwargs: verbose=True
    """

    # Shuffle and select the number of parts it 
    # will be split into
    #
    d = d.sample(frac=1)
    p = 1/partitions
    
    # Define the number of samples per partition:
    # equi-sampling will force us to select the 
    # occurrences of the less abundant
    #
    s = int(d.shape[0]/partitions)
    for a in [x*p for x in range(partitions)]:
        inner_s = sum(d.iloc[:,-1].apply(lambda x: round(x,1)).between(a,a+p))
        if inner_s < s: s = inner_s
    s = int(s)
    if kwargs.get('verbose',True):
        print(f'\nBalancing samples:\n{s} samples per bin with {partitions} bins '\
              f'will transform the {d.shape[0]}-points dataset\ninto a '\
              f'{partitions*s}-points dataset!')
    
    # retrieve 's' items per class
    #
    data = []
    for a in [x*p for x in range(partitions)]:
        data += [d[d.iloc[:,-1].apply(lambda x: round(x,1)).between(a,a+p)].sample(frac=1).iloc[:s,:]]
    if kwargs.get('verbose',True):
        print('\nDataset balanced successfully!\n')
    return pd.concat(data,0)

In [170]:
def vainilla_main(required_length = 15E3, vainilla = True):
    df = pd.DataFrame()
    count = 0
    q = 0
    while ((len(df)<required_length) and (count<10)):
        print(f'\nIteration {count}:\nRequired Length: {required_length}\n'\
             f'Current Length: {len(df)}\n')
        if vainilla: df = balanced_samples(generator(
                                    50+q,
                                    50+q,
                                    50+q,
                                    verbose=False),
        else: df = generator(
                            3+q/5,
                            3+q,
                            3+q,
                            verbose=False),
                                          5, verbose=False)
                                          5, verbose=False)
        count += 1
        q += 5

    if len(df)>=required_length: print('\nSUCCESS! the required-length-condition WAS SATISFIED')
    else: print('\nFAILURE: the required-length-condition WAS NOT SATISFIED')
    print(f'\n\nREQUIRED: {required_length}\nACTUAL: {len(df)}')
    return df

In [None]:
df = vainilla_main()

In [165]:
print(df.head())
df.to_csv('databases/tunnel-effect-database-vainilla.csv', index=False)

               L          V    E         proba
47616   0.505145  32.862626  2.9  5.171113e-04
143699  1.616245  48.991919  0.2  8.817656e-16
192233  2.121291  32.862626  4.1  1.855367e-14
249920  2.727345  11.692929  8.6  6.630583e-06
240379  2.626336  17.237374  7.9  6.094185e-10
