## A simple tutorial for basic uses of classes
basics of object-oriented programming

***Learning by doing!***

In [1]:
import numpy as np

### 1. Classes as containers (with `Enum`)

You can also define classes just as containers (with `Enum`); in this case, you don't have `self` because you are not going to define multiple class instances, just a single one

In [2]:
from enum import Enum
from typing import Type, Optional

`Regularization` acts a "default dictionary" let' say, a dictionary which can include two keys: `force_field_reg` and `forward_model_reg`, to specify the regularizations to the force-field correction and the forward model, respectively;

- the first key is either a string (among `plain l2`, `constraint 1`, `constraint 2`, `KL divergence`) or a user-defined function which takes as input `pars_ff` and returns the regularization term to be multiplied by the hyperparameter `beta`;
- the second key is a user-defined function which takes as input `pars_fm` and `forward_coeffs_0` (current and refined forward-model coefficients) and returns the regularization term to be multiplied by the hyperparameter `gamma`.

#### 1. define `Force_field_reg` and `Forward_model_reg` with `Enum`

In [3]:
class Force_field_reg(Enum):
    PLAIN_L2 = 'plain l2'
    CONSTRAINT_1 = 'constraint 1'
    CONSTRAINT_2 = 'contraint 2'
    KL_DIVERGENCE = 'KL divergence'
    CUSTOM = lambda x : np.linalg.norm(x)**2  # example of custom function

In [4]:
Force_field_reg.PLAIN_L2 = 7

# correctly, you cannot assign to Force_field_reg an arbitrary attribute value!

AttributeError: Cannot reassign members.

In [4]:
ff_reg = Force_field_reg.PLAIN_L2

if ff_reg.value == 'plain l2': print('yes')

yes


In [5]:
class Forward_model_reg(Enum):
    PLAIN_L2 = 'plain l2'
    CUSTOM = lambda x : np.linalg.norm(x)**2

# class Regularization:
#     force_field_reg = Force_field_reg
#     forward_model_reg = Forward_model_reg
    
#     assert type(force_field_reg)

#### 2. define `Regularization`

you can also define the `Regularization` class with `Type`

optimal way to define it:

In [6]:
class Regularization:
    def __init__(self, force_field_reg : Optional[Force_field_reg] = None,
        forward_model_reg: Optional[Forward_model_reg] = None):
        
        if not (isinstance(force_field_reg, Force_field_reg) or (force_field_reg is None)):
            raise TypeError("force_field_reg must be an instance of Force_field_reg")
        self.force_field_reg = force_field_reg

        if not (isinstance(forward_model_reg, Forward_model_reg) or (forward_model_reg is None)):
            raise TypeError("forward_model_reg must be an instance of Forward_model_reg")
        self.forward_model_reg = forward_model_reg


#### 3. usage

In [8]:
reg = Regularization(force_field_reg=Force_field_reg.KL_DIVERGENCE)

print(reg.force_field_reg.value)
print(reg.forward_model_reg)

KL divergence
None


In [13]:
reg = Regularization(forward_model_reg=Forward_model_reg.PLAIN_L2)

print(reg.forward_model_reg.value)

plain l2


In [14]:
reg1 = Regularization(Force_field_reg.KL_DIVERGENCE)
reg2 = Regularization(None)  # now allowed
print(reg1.force_field_reg)
print(reg2.force_field_reg)

Force_field_reg.KL_DIVERGENCE
None


In [15]:
reg = Regularization(None)

print(reg.force_field_reg)

None


In [17]:
try:
    reg.force_field_reg = Force_field_reg.PLAIN_L2 = my_fun
    # print(reg.force_field_reg)
except:
    print('correctly, it returns error')

correctly, it returns error


In [21]:
print(reg.force_field_reg)

None


### 2. Classes as a collection of files
(or: *from a directory to a class instance*)

suppose you are running some simulations (MCMC, for instance) which end up with multiple folders, one for each simulation; each folder contains several files, like properties of the MCMC run, the specific trajectory, the energy time series, other time series...

for each folder, you want to define an object (instance of class `MyData`), with all the information stored in the corresponding folder

then, you can build a dictionary (or a list) with all these objects

see also: `Jeffreys_prior/main_notebook_3.ipynb`

In [None]:
class MyData_parvalues:
    """
    from a pandas dataframe to a dictionary of class instances, without further memory required;
    in this way, you can add further properties to each class instance, beyond those in the pandas dataframe
    
    self.stride = row['stride'] → value is copied, no reference

    self._df = df + property → reference kept, reflects changes

    Defining new attributes as in the following lines will duplicate the data (twice memory will be required)
    indeed if I modify for example a value through MyData instance, the corresponding value
    in the original dataframe will not be modified
    
    for s in list(df.columns):
        setattr(self, s, df[s].loc[label])
    
    so, refer to it as reference-only: to do this we can define it as a method
    in addition, @property has specific advantages when you want an attribute-like interface with dynamic behavior:
    both dynamic computation (as method), still access it like an attribute e.g. obj.stride rather than obj.stride()
    the following lines correspond to what those containing make_getter do in each iteration
    
    @property
    def stride(self):
        return self.df.loc[self.label, 'stride']
    """
    def __init__(self, i, label, df):  # , traj, qs, ene):
        self.i = i
        self.label = label
        self.df = df  # this is just a reference (to the full dataframe), so no further memory is required

        for col in df.columns:  # also this is just a reference (see above)
            def make_getter(col):
                return property(lambda self: df.loc[self.label, col])
            
            setattr(self.__class__, col, make_getter(col))

        # row = self.df.loc[self.label]

        # for col_name, value in row.items():
        #     setattr(self, col_name, value)

        # self.stride = int(self.stride)

        # this does not work!
        # for col in df.columns:
        #     setattr(self, col, df.loc[label, col])
        
        # # these are references (you can check by modifying just one element of that, it is modified also the original one)
        # self.traj = traj
        # self.n_steps_MC = traj.shape[0]
        # self.obs = qs[:, :28]
        # self.dkl = qs[:, 28]
        # if qs.shape[1] == 30: self.loss = qs[:, 29]
        # self.energy = ene

    def get_attributes(self):
        return [s for s in dir(self) if s[:2] != '__']


class MyData(MyData_parvalues):
    def __init__(self, mydata_parvalues : MyData_parvalues, path):

        super().__init__(mydata_parvalues.i, mydata_parvalues.label, mydata_parvalues.df)  # **mydata_parvalues.__dict__)
        # in this way, an instance of `MyData_parvalues` is initialized with all the attributes
        # given by mydata_parvalues.__dict__, so no new attributes can be included to mydata_parvalues
        # beyond those already contained in its __init__ !! (otherwise return error)

        # # these are references (you can check by modifying just one element of that, it is modified also the original one)
        # # you could also directly read them from input files
        # self.traj = traj
        # self.n_steps_MC = traj.shape[0]
        # self.obs = qs[:, :28]
        # self.dkl = qs[:, 28]
        # if qs.shape[1] == 30: self.loss = qs[:, 29]
        # self.energy = ene

        self.traj = np.load(path + 'Result_' + self.label + '/trajectory.npy', mmap_mode='r')
        self.n_steps_true = self.traj.shape[0]

        qs = np.load(path + 'Result_' + self.label + '/quantities.npy', mmap_mode='r')

        self.obs = qs[:, :28]
        self.dkl = qs[:, 28]
        if qs.shape[1] == 30: self.loss = qs[:, 29]  # without non-informative prior
        self.energy = np.load(path + 'Result_' + self.label + '/energy.npy', mmap_mode='r')

        # add average values and stds computed through `my_mean_and_std`


a simpler way but perhaps less effective / professional (maybe some attributes are not reference-like)

In [None]:
class MyData_parvalues:
    """
    from a pandas dataframe to a dictionary of class instances, without further memory required;
    in this way, you can add further properties to each class instance, beyond those in the pandas dataframe
    
    self.stride = row['stride'] → value is copied, no reference

    self._df = df + property → reference kept, reflects changes

    Defining new attributes as in the following lines will duplicate the data (twice memory will be required)
    indeed if I modify for example a value through MyData instance, the corresponding value
    in the original dataframe will not be modified
    
    for s in list(df.columns):
        setattr(self, s, df[s].loc[label])
    
    so, refer to it as reference-only: to do this we can define it as a method
    in addition, @property has specific advantages when you want an attribute-like interface with dynamic behavior:
    both dynamic computation (as method), still access it like an attribute e.g. obj.stride rather than obj.stride()
    the following lines correspond to what those containing make_getter do in each iteration
    
    @property
    def stride(self):
        return self.df.loc[self.label, 'stride']
    """
    def __init__(self, i, label, df):  # , traj, qs, ene):
        self.i = i
        self.label = label
        self.df = df  # this is just a reference (to the full dataframe), so no further memory is required

        # for col in df.columns:  # also this is just a reference (see above)
        #     def make_getter(col):
        #         return property(lambda self: df.loc[self.label, col])
            
        #     setattr(self.__class__, col, make_getter(col))

        # is this just a reference?
        row = self.df.loc[self.label]

        for col_name, value in row.items():
            setattr(self, col_name, value)

        self.stride = int(self.stride)

    def get_attributes(self):
        return [s for s in dir(self) if s[:2] != '__']


class MyData():
    def __init__(self, mydata_parvalues : MyData_parvalues, path):

        for item in vars(mydata_parvalues).items():
            setattr(self, item[0], item[1])

        # # these are references (you can check by modifying just one element of that, it is modified also the original one)
        # # you could also directly read them from input files
        # self.traj = traj
        # self.n_steps_MC = traj.shape[0]
        # self.obs = qs[:, :28]
        # self.dkl = qs[:, 28]
        # if qs.shape[1] == 30: self.loss = qs[:, 29]
        # self.energy = ene

        self.traj = np.load(path + 'Result_' + self.label + '/trajectory.npy', mmap_mode='r')
        self.n_steps_true = self.traj.shape[0]

        qs = np.load(path + 'Result_' + self.label + '/quantities.npy', mmap_mode='r')

        self.obs = qs[:, :28]
        self.dkl = qs[:, 28]
        if qs.shape[1] == 30: self.loss = qs[:, 29]  # without non-informative prior
        self.energy = np.load(path + 'Result_' + self.label + '/energy.npy', mmap_mode='r')

        # add average values and stds computed through `my_mean_and_std`

### 3. Merge multiple instances
of the same class

In [11]:
class MyQuantities():
    def __init__(self, x: np.ndarray, y: np.ndarray):
        self.x = np.array(x)   # ensure numpy array
        self.y = np.array(y)

    @classmethod
    def merge(cls, instances):
        # get all attribute names
        attrs = vars(instances[0]).keys()

        merged = {}
        for attr in attrs:
            arrays = [getattr(obj, attr) for obj in instances]
            merged[attr] = np.concatenate(arrays, axis=0)  # join arrays
        return cls(**merged)

In [12]:
a = MyQuantities([1, 2], [10, 20])
b = MyQuantities([3], [30])
c = MyQuantities([4, 5], [40, 50])

merged = MyQuantities.merge([a, b, c])

print(merged.x)  # [1 2 3 4 5]
print(merged.y)  # [10 20 30 40 50]

vars(merged)

[1 2 3 4 5]
[10 20 30 40 50]


{'x': array([1, 2, 3, 4, 5]), 'y': array([10, 20, 30, 40, 50])}

how to inherit from `Result` class?

I can do like this:

In [None]:
import sys
sys.path.insert(0, '../')

from Core_tools.coretools import Result

In [13]:
class MyQuantities():
    def __init__(self, x: np.ndarray, y: np.ndarray):
        self.x = np.array(x)   # ensure numpy array
        self.y = np.array(y)

    @classmethod
    def merge(cls, instances):
        # get all attribute names
        attrs = vars(instances[0]).keys()

        merged = {}
        for attr in attrs:
            arrays = [getattr(obj, attr) for obj in instances]
            merged[attr] = np.concatenate(arrays, axis=0)  # join arrays
        return MyQuantitiesConcat(cls(**merged))

class MyQuantitiesConcat(Result):
    def __init__(self, my_quantities_concat : MyQuantities):
        super().__init__(**my_quantities_concat.__dict__)

NameError: name 'Result' is not defined