In [21]:
import MonteCarloCV

ModuleNotFoundError: No module named 'MonteCarloCV'

In [1]:
import numpy as np
import pandas as pd
from scipy.io import loadmat
import matplotlib.pyplot as plt

data = loadmat('dispersionpoints.mat')
print(data.keys())

dict_keys(['__header__', '__version__', '__globals__', 'ESP', 'lamda1', 'x1'])


## Monte Carlo CV

In [None]:
!pip install pytorch
!pip install tensorflow --use-pep517
!pip install keras --use-pep517

In [22]:
from typing import List, Generator

import numpy as np

from sklearn.model_selection._split import _BaseKFold
from sklearn.utils.validation import indexable, _num_samples


class MonteCarloCV(_BaseKFold):

    def __init__(self,
                 n_splits: int,
                 train_size: float,
                 test_size: float,
                 gap: int = 0):
        """
        Monte Carlo Cross-Validation

        Holdout applied in multiple testing periods
        Testing origin (time-step where testing begins) is randomly chosen according to a monte carlo simulation

        :param n_splits: (int) Number of monte carlo repetitions in the procedure
        :param train_size: (float) Train size, in terms of ratio of the total length of the series
        :param test_size: (float) Test size, in terms of ratio of the total length of the series
        :param gap: (int) Number of samples to exclude from the end of each train set before the test set.
        """

        self.n_splits = n_splits
        self.n_samples = -1
        self.gap = gap
        self.train_size = train_size
        self.test_size = test_size
        self.train_n_samples = 0
        self.test_n_samples = 0

        self.mc_origins = []

    def split(self, X, y=None, groups=None) -> Generator:
        """Generate indices to split data into training and test set.
        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where `n_samples` is the number of samples
            and `n_features` is the number of features.
        y : array-like of shape (n_samples,)
            Always ignored, exists for compatibility.
        groups : array-like of shape (n_samples,)
            Always ignored, exists for compatibility.
        Yields
        ------
        train : ndarray
            The training set indices for that split.
        test : ndarray
            The testing set indices for that split.
        """

        X, y, groups = indexable(X, y, groups)
        self.n_samples = _num_samples(X)

        self.train_n_samples = int(self.n_samples * self.train_size) - 1
        self.test_n_samples = int(self.n_samples * self.test_size) - 1

        # Make sure we have enough samples for the given split parameters
        if self.n_splits > self.n_samples:
            raise ValueError(
                f'Cannot have number of folds={self.n_splits} greater'
                f' than the number of samples={self.n_samples}.'
            )
        if self.train_n_samples - self.gap <= 0:
            raise ValueError(
                f'The gap={self.gap} is too big for number of training samples'
                f'={self.train_n_samples} with testing samples={self.test_n_samples} and gap={self.gap}.'
            )

        indices = np.arange(self.n_samples)

        selection_range = np.arange(self.train_n_samples + 1, self.n_samples - self.test_n_samples - 1)

        self.mc_origins = \
            np.random.choice(a=selection_range,
                             size=self.n_splits,
                             replace=True)

        for origin in self.mc_origins:
            if self.gap > 0:
                train_end = origin - self.gap + 1
            else:
                train_end = origin - self.gap
            train_start = origin - self.train_n_samples - 1

            test_end = origin + self.test_n_samples

            yield (
                indices[train_start:train_end],
                indices[origin:test_end],
            )

    def get_origins(self) -> List[int]:
        return self.mc_origins

ModuleNotFoundError: No module named 'sklearn'

In [13]:
data_lambda = data['lamda1']

#First and last run preview
display(data_lambda[:,:,0])
display(data_lambda[:,:,99])

#tensor flatten and stack
lambda1 = data_lambda.transpose(1,2,0).reshape(-1,data_lambda.shape[1])
display(lambda1)
display(lambda1.shape)

#base on
#https://stackoverflow.com/questions/35992458/convert-reshape-3d-matrix-to-a-2d-matrix


array([[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 6.68424850e-04, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 1.33633598e-03, 0.00000000e+00],
       ...,
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        8.26254997e-01, 1.00671452e+00, 1.02473435e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        8.26877362e-01, 1.00802210e+00, 1.02609304e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        8.27499826e-01, 1.00933069e+00, 1.02745084e+00]])

array([[0.00000000e+000, 0.00000000e+000, 0.00000000e+000, ...,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000],
       [0.00000000e+000, 0.00000000e+000, 0.00000000e+000, ...,
        0.00000000e+000, 5.06806211e-253, 0.00000000e+000],
       [0.00000000e+000, 0.00000000e+000, 0.00000000e+000, ...,
        0.00000000e+000, 1.01322291e-252, 0.00000000e+000],
       ...,
       [0.00000000e+000, 0.00000000e+000, 0.00000000e+000, ...,
        4.06197666e-001, 4.24970087e-001, 2.54093240e-001],
       [0.00000000e+000, 0.00000000e+000, 0.00000000e+000, ...,
        4.06926305e-001, 4.25343292e-001, 2.54330546e-001],
       [0.00000000e+000, 0.00000000e+000, 0.00000000e+000, ...,
        4.07653070e-001, 4.25716238e-001, 2.54567746e-001]])

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.21115606, 0.21146157, 0.21176981, ..., 0.21832627, 0.21879533,
        0.21924649],
       [0.21968168, 0.22010254, 0.22051054, ..., 0.23807601, 0.23832682,
        0.23857723],
       [0.23882726, 0.23907691, 0.2393262 , ..., 0.25409324, 0.25433055,
        0.25456775]])

(149900, 66)

In [19]:
#Espessura
ESP = pd.DataFrame(data['ESP'].reshape(-1,), columns = ['Espessura'])
print(ESP.shape)
display(ESP)

#Comprimento de onda normalizado
lambda1 = pd.DataFrame(lambda1, columns = range(1,67))
display(lambda1)

#frequências normalizadas
x1 = data['x1']
print(x1.shape)
x1flat = x1.flatten()
x1_df = pd.DataFrame(x1flat, columns = ['Freq'])
x1_df

(100, 1)


Unnamed: 0,Espessura
0,0.0300
1,0.0298
2,0.0296
3,0.0294
4,0.0292
...,...
95,0.0110
96,0.0108
97,0.0106
98,0.0104


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,57,58,59,60,61,62,63,64,65,66
0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149895,5.051909e-07,5.059182e-07,5.066453e-07,5.073725e-07,5.080996e-07,5.088268e-07,5.095539e-07,5.102811e-07,5.110082e-07,5.117354e-07,...,0.190614,0.190875,0.191135,0.191396,0.191656,0.191917,0.192177,0.192438,0.192698,0.192959
149896,1.932198e-01,1.934806e-01,1.937414e-01,1.940024e-01,1.942634e-01,1.945245e-01,1.947857e-01,1.950470e-01,1.953084e-01,1.955699e-01,...,0.208218,0.208504,0.208792,0.209081,0.209371,0.209664,0.209958,0.210254,0.210553,0.210853
149897,2.111561e-01,2.114616e-01,2.117698e-01,2.120810e-01,2.123953e-01,2.127130e-01,2.130344e-01,2.133596e-01,2.136891e-01,2.140232e-01,...,0.000366,0.000367,0.000367,0.216221,0.216788,0.217325,0.217837,0.218326,0.218795,0.219246
149898,2.196817e-01,2.201025e-01,2.205105e-01,2.209069e-01,2.212928e-01,2.216692e-01,2.220369e-01,2.223968e-01,2.227494e-01,2.230953e-01,...,0.236308,0.236562,0.236816,0.237069,0.237321,0.237573,0.237825,0.238076,0.238327,0.238577


(1499, 100)


Unnamed: 0,Freq
0,0.00000
1,0.00000
2,0.00000
3,0.00000
4,0.00000
...,...
149895,1.64780
149896,1.61784
149897,1.58788
149898,1.55792


In [56]:
display(ESP.describe())
lambda1.insert(0, "x1",x1_df)
lambda1.describe()

Unnamed: 0,0
count,100.0
mean,0.0201
std,0.005802
min,0.0102
25%,0.01515
50%,0.0201
75%,0.02505
max,0.03


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,56,57,58,59,60,61,62,63,64,65
count,149900.0,149900.0,149900.0,149900.0,149900.0,149900.0,149900.0,149900.0,149900.0,149900.0,...,149900.0,149900.0,149900.0,149900.0,149900.0,149900.0,149900.0,149900.0,149900.0,149900.0
mean,0.018834,0.018936,0.018726,0.018738,0.018546,0.01862,0.01866,0.018717,0.019008,0.018894,...,0.018314,0.018543,0.018506,0.018532,0.018388,0.018575,0.018622,0.018485,0.018443,0.01861
std,0.11357,0.113991,0.112959,0.113266,0.112436,0.112884,0.112768,0.113146,0.114332,0.113628,...,0.111107,0.112441,0.112418,0.112228,0.111567,0.112434,0.112759,0.112288,0.112005,0.112803
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1.566566,1.515966,1.51697,1.517973,1.518975,1.519978,1.520981,1.521984,1.522987,1.523989,...,1.556736,1.557719,1.558702,1.559685,1.560668,1.561651,1.562634,1.563617,1.5646,1.565583


Unnamed: 0,0
count,149900.0
mean,1.50549
std,1.002949
min,0.0
25%,0.68544
50%,1.37178
75%,2.17442
max,4.494


## CNN