In [199]:
import numpy as np
from random import choice,sample,randint
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm
from os import remove
import cProfile
from scipy.stats.stats import pearsonr,spearmanr


In [268]:
class GA:
    def __init__(self,generation,population,expressions,scaler,thresh,cutoff,coef,mutation = 0):
        self.generation = generation
        self.population = population
        self.expressions = expressions
        self.history = []    
        self.scaler = scaler
        self.thresh = thresh
        self.cutoff = cutoff
        self.coef = coef
        self.coefSurvive = 0
        self.mutation = int(mutation*population)
    
    def fit_transform(self,X,Y):
        originalX = X.reshape(X.shape[1],X.shape[0],1)
        self.xshape = X.shape
        #print(X.shape)
        tsProb = self._get_ts_prob(self.xshape[1])
        #print(tsProb.shape)
        prevProb = []
        print("Mutate Pop = {}".format(self.mutation))
        
        for i in tqdm(range(self.generation),desc = "Generation"):
            print("Fitting Generation {}".format(i+1))
            self.historyTemp = []
            memArray = np.memmap(dtype = 'float32',filename = "tempArray.array",shape = (self.population+
                                                                                         originalX.shape[0],X.shape[0],
                                                                                         1),mode = "w+")
            memArray[:originalX.shape[0],:,:] = originalX[:]
            surviveCount = originalX.shape[0]
            print("Survive Count = {}".format(surviveCount))
            prob = np.append(tsProb,prevProb)
            prob = self._normalize_prob(prob)
            #print("PREV prob = {} , type = {}".format(prob,prob.dtype))

            
            prevProb = []
            self.coefSurvive = 0
            c = 0
            snr = 0
            muCount = 0
            for j in tqdm(range(self.population+self.mutation),desc = "Population"):
                child,score,survive,ops = self._gen_child(X,Y,prob,i)
                if(j>=self.population):
                    muCount+=1
                if(survive or j>=self.population):
                    self.historyTemp.append(ops)
                    #print("SNR = {}".format(score))
                    snr += score
                    child = child.reshape(len(child),1)
                    child = self.scaler.fit_transform(child)
                    memArray[surviveCount] = child
                    surviveCount+=1
                    c = c+1
                    prevProb.append(score)
            if (muCount==0 or c==0):
                print("No surviver return previous generation {}".format(i))
                return X
            
            X = memArray[:surviveCount,:,:]
            print("pre {}".format(X.shape))
            X = X.reshape((X.shape[0],X.shape[1])).transpose()
            remove("tempArray.array")
            print("Coef survive = {}".format(self.coefSurvive))
            print("XSHAPE = {}".format(X.shape))
            print("SURVIVED = {} ,, Mutate Pop = {}".format(c-muCount,muCount))
            self.history.append(self.historyTemp)
        return X[:,self.xshape[1]:]
    
    def transform(self,X,gen = -1):
        ori = X.shape
        originalX = X.transpose()
        originalX = originalX.reshape(originalX.shape[0],originalX.shape[1],1)
        
        tempHist = self.history
        if gen >0:
            tempHist = self.history[:gen:]
            
        for i,generation in tqdm(enumerate(tempHist),desc="Generation"):
            print("Transforming Generation {}".format(i+1))
            
            memArray = np.memmap(dtype = "float32"
                                 ,filename = "tempArray2.array"
                                 ,shape = (self.population+originalX.shape[0],X.shape[0],1)
                                 ,mode = "w+")
            
            memArray[:originalX.shape[0],:,:] = originalX[:]
            jdx = originalX.shape[0]
            #tempChild = np.zeros((X.shape[0],1))
            for j,population in tqdm(enumerate(generation),desc = "Population"):
                expression,idx,n = population
                operand = self._gen_operand(X,idx)
              
                child = expression.compute(operand,n)
                child = child.reshape(len(child),1)

                child = self.scaler.fit_transform(child)
                
                memArray[jdx] = child
                #tempChild = np.hstack((tempChild,child))
                jdx = jdx+1
            X = memArray[:jdx,:,:]
            #print(X.shape)
            X = X.reshape((X.shape[0],X.shape[1])).transpose()
            #print(X.shape)
            remove("tempArray2.array")
        print(originalX.shape)
        return X[:,ori[1]:]
    
    
    def _get_ts_prob(self,size):
        M = self.xshape[1]
        priorprob = 1.-np.exp(0.05*np.arange(M))+np.exp(0.05*M)
        priorprob = priorprob/np.sum(priorprob)
        return priorprob
    
    def get_history(self):
        return self.history
    
    def _normalize_prob(self,prob):
        prob_factor = 1 / sum(prob)
        res =  prob*prob_factor
        #print("Normalize prob :{}".format(res.dtype))
        return res
        
    
    def _gen_child(self,X,Y,prob,i,mutate = False):
        expression = self._gen_function()
        #n_var = expression.gen_var(X)
        n_var = expression.n_var
        if (n_var == -1):
            n_var = self._gen_n(X)
        idx = self._gen_idx(X,n_var,prob)
        n = expression.gen_n()
        operand = self._gen_operand(X,idx)
        result = expression.compute(operand,n)
        ##############
        survivedCoef = self._coef_thresh(result,X)
        survived,score = self.thresh.survived(result,Y,self.cutoff[i])
        if(survivedCoef):
            self.coefSurvive +=1

        
        if ((survived and survivedCoef) or mutate):
            return result,score,True,(expression,idx,n)
        else:
            #print(score)
            return result,score,False,(expression,idx,n)
    
    def _corr2_coeff(self,A,B):
        # Rowwise mean of input arrays & subtract from input arrays themeselves
        A_mA = A - A.mean(1)[:,None]
        B_mB = B - B.mean(1)[:,None]

        # Sum of squares across rows
        ssA = (A_mA**2).sum(1)
        ssB = (B_mB**2).sum(1)

        # Finally get corr coeff
        return np.nan_to_num(np.dot(A_mA,B_mB.T)/np.sqrt(np.dot(ssA[:,None],ssB[None])))
    
    
    def _coef_thresh(self,result,X):
        
        result = result.reshape(1,result.shape[0])
        parents = X.transpose()
        cor = self._corr2_coeff(parents,result)
        cor = cor.reshape(cor.shape[0])
        if (np.all(np.absolute(cor)<self.coef)):
            return True
        #print("MAX = {},,MIN = {}".format(cor.max(),cor.min()))
        return False
    
    def _gen_n(self,X):
        n = np.random.randint(1,X.shape[1])
        #print(n)
        return n

    def _gen_function(self):
        return choice(self.expressions)        
    
    def _gen_idx(self,X,n_var,prob):
        if(prob.dtype =='O'):
            print(prob)
            print(prob.dtype)
            print(X.dtype)
        return np.random.choice(a = range(0,X.shape[1]),size = n_var,p = prob)
        #return np.random.choice(a = range(0,X.shape[1]),size = n_var)
        
    def _gen_operand(self,X,idx):
        #print(X.shape)
        #print(X[:,idx].shape)
        return X[:,idx]

        
        

In [269]:
## return numbers only
class ABS:
    n_var = 1
    def gen_n():
        return None
    def compute(x,n):
        return np.absolute(x,axis = 1)

class Sin:
    n_var = 1
    def gen_n():
        return None
    def compute(x,n):
        result = np.sin(x)
        return np.nan_to_num(result)
    

class Log:
    n_var = 1
    def gen_n():
        return None
    def compute(x,n):
        result = np.log(x[:,0])
        return np.nan_to_num(result)

class Add:
    n_var = 2
    def gen_n():
        return None
    def compute(x,n):
        return np.add(x[:,0],x[:,1])

class Subtract:
    n_var = 2
    def gen_n():
        return None
    def compute(x,n):
        return np.subtract(x[:,0],x[:,1])
    
class Multiply:
    n_var = 2
    def gen_n():
        return None
    def compute(x,n):
        return np.multiply(x[:,0],x[:,1])

class Divide:
    n_var = 2
    def gen_n():
        return None
    def compute(x,n):
        result = np.divide(x[:,0],x[:,1])
        return np.nan_to_num(result)

class Median:
    n_var = 10
    def gen_n():
        return None
    def compute(x,n):
        result = np.median(x,axis = 1)
        return result
    
    
class Mean:
    n_var = 10
    def gen_n():
        return None
    def compute(x,n):
        return np.mean(x,axis = 1)
    
class Mode:
    n_var = 10
    def gen_n():
        return None
    def compute(x,n):
        return np.max(x,axis = 1)
    
class Min:
    n_var = 10
    def gen_n():
        return None
    def compute(x,n):
        return np.min(x,axis = 1)
    
class Total:
    n_var = 10
    def gen_n():
        return None
    def compute(x,n):
        return np.sum(x,axis = 1)
    

class Power:
    n_var = 1
    def gen_n():
        n = choice([1,2,3,4])
        return n
        
    def compute(x,n):
        return np.nan_to_num(x**n)

class Exp:
    n_var = 1
    def gen_n():
        n = choice([1,2,3,4])
        return n
    
    def compute(x,n):
        return np.nan_to_num(np.exp(n*x))
    
class Tanh:
    n_var = 1
    def gen_n():
        return None
    
    def compute(x,n):
        return np.nan_to_num(np.tanh(x))

    
class Cos:
    n_var = 1
    
    def gen_n():
        return randint(0,90)
    
    def compute(x,n):
        return np.nan_to_num(np.cos(x+n))

class Sigmoid:
    n_var = 1
    def gen_n():
        return None
    
    def compute(x,n):
        res = 1/(1+np.exp(-x))
        return res
    



    


In [270]:
class DummpyScale():
    def fit_transform(self,x):
        return x
    
class RandomPass():
    def survived(self,x,y,t):
        sc = randint(0,1)
        return bool(sc),sc

class SNR():
        
    def survived(self, X, y,thresh):
        if len(X.shape) == 1:
            fdr = [self.__binfdr(X,y)]
        else:
            (N,M) = X.shape
            fdr = np.array([])
            for j in range(M):
                feat = X[:,j]
                
                score = self.__binfdr(feat,y)
                fdr = np.append(fdr,score)
        #print("fdr = {}:::type = {}".format(fdr,type(fdr)))
        #print("thresh = {}:::type = {}".format(thresh,type(thresh)))
        return thresh<fdr[0],fdr[0]
    
    def __binfdr(self, feat, y):
        (pluses,minuses) = np.unique(y)
        idx = np.where(y == pluses)
        x = feat[idx]
        muPlus = np.mean(x)
        varPlus = np.var(x)
        idx = np.where(y == minuses)
        x = feat[idx]
        muMinus = np.mean(x)
        varMinus = np.var(x)

        score = ((muPlus - muMinus)**2)/(varPlus + varMinus)
        return score
    

In [271]:
trainX = np.random.randint(0,101,(2000,1000))
trainY = np.random.randint(0,2,(1000))

In [272]:
#    def __init__(self,generation,population,expressions,scaler,thresh,cutoff,coef):


scaler = MinMaxScaler(feature_range=(0.1,1.9))
methods = [Add,Subtract,Multiply,Divide,Log,Mean,Power,Cos,Sin,Tanh,Exp,Sigmoid,Mode,Min,Total,Median]
thresh = SNR()
g = GA(2,1000,methods,scaler,thresh,[0.01,0.01],0.999,0.1)

In [273]:
result = g.fit_transform(trainX,trainY)

Generation:   0%|          | 0/2 [00:00<?, ?it/s]
  ret = umr_sum(arr, axis, dtype, out, keepdims)
  arrmean = umr_sum(arr, axis, dtype, keepdims=True)

Population:   0%|          | 5/1100 [00:00<00:25, 42.64it/s][A

Mutate Pop = 100
Fitting Generation 1
Survive Count = 1000



Population:   1%|          | 10/1100 [00:00<00:25, 42.37it/s][A
Population:   1%|▏         | 15/1100 [00:00<00:25, 42.76it/s][A
Population:   2%|▏         | 20/1100 [00:00<00:24, 43.22it/s][A
Population:   2%|▏         | 25/1100 [00:00<00:25, 42.94it/s][A
Population:   3%|▎         | 29/1100 [00:00<00:25, 42.14it/s][A
Population:   3%|▎         | 34/1100 [00:00<00:25, 42.20it/s][A
Population:   4%|▎         | 39/1100 [00:00<00:24, 42.56it/s][A
Population:   4%|▍         | 44/1100 [00:01<00:24, 42.81it/s][A
Population:   4%|▍         | 49/1100 [00:01<00:24, 42.97it/s][A
Population:   5%|▍         | 54/1100 [00:01<00:24, 43.00it/s][A
Population:   5%|▌         | 59/1100 [00:01<00:24, 43.22it/s][A

Population:   6%|▋         | 69/1100 [00:01<00:23, 43.52it/s][A
Population:   7%|▋         | 74/1100 [00:01<00:23, 43.48it/s][A
Population:   7%|▋         | 79/1100 [00:01<00:23, 43.55it/s][A
Population:   8%|▊         | 84/1100 [00:01<00:23, 43.50it/s][A
Population:   8%|▊     

Population:  50%|█████     | 554/1100 [00:12<00:12, 43.88it/s][A
Population:  51%|█████     | 559/1100 [00:12<00:12, 43.89it/s][A
Population:  51%|█████▏    | 564/1100 [00:12<00:12, 43.88it/s][A
Population:  52%|█████▏    | 569/1100 [00:12<00:12, 43.88it/s][A
Population:  52%|█████▏    | 574/1100 [00:13<00:11, 43.87it/s][A
Population:  53%|█████▎    | 579/1100 [00:13<00:11, 43.88it/s][A
Population:  53%|█████▎    | 584/1100 [00:13<00:11, 43.88it/s][A
Population:  54%|█████▎    | 589/1100 [00:13<00:11, 43.89it/s][A
Population:  54%|█████▍    | 594/1100 [00:13<00:11, 43.90it/s][A
Population:  54%|█████▍    | 599/1100 [00:13<00:11, 43.92it/s][A
Population:  55%|█████▍    | 604/1100 [00:13<00:11, 43.92it/s][A
Population:  55%|█████▌    | 609/1100 [00:13<00:11, 43.92it/s][A
Population:  56%|█████▌    | 614/1100 [00:13<00:11, 43.91it/s][A
Population:  56%|█████▋    | 619/1100 [00:14<00:10, 43.91it/s][A
Population:  57%|█████▋    | 624/1100 [00:14<00:10, 43.90it/s][A
Population

pre (1129, 2000, 1)
Coef survive = 1079
XSHAPE = (2000, 1129)
SURVIVED = 29 ,, Mutate Pop = 100
Fitting Generation 2
Survive Count = 1000



Population:   1%|▏         | 14/1100 [00:00<00:16, 67.67it/s][A

Population:   3%|▎         | 28/1100 [00:00<00:16, 66.69it/s][A
Population:   3%|▎         | 35/1100 [00:00<00:16, 66.38it/s][A
Population:   4%|▍         | 43/1100 [00:00<00:15, 67.71it/s][A
Population:   5%|▍         | 51/1100 [00:00<00:15, 68.52it/s][A
Population:   5%|▌         | 59/1100 [00:00<00:15, 69.08it/s][A
Population:   6%|▌         | 67/1100 [00:00<00:14, 69.42it/s][A
Population:   7%|▋         | 75/1100 [00:01<00:14, 69.67it/s][A
Population:   8%|▊         | 83/1100 [00:01<00:14, 69.85it/s][A
Population:   8%|▊         | 91/1100 [00:01<00:14, 69.90it/s][A
Population:   9%|▉         | 99/1100 [00:01<00:14, 70.15it/s][A
Population:  10%|▉         | 107/1100 [00:01<00:14, 70.10it/s][A
Population:  10%|█         | 115/1100 [00:01<00:14, 70.32it/s][A
Population:  11%|█         | 123/1100 [00:01<00:13, 70.48it/s][A
Population:  12%|█▏        | 131/1100 [00:01<00:13, 70.51it/s][A
Population:  13%|█▎

Population:  86%|████████▋ | 950/1100 [00:13<00:02, 69.05it/s][A
Population:  87%|████████▋ | 957/1100 [00:13<00:02, 68.97it/s][A
Population:  88%|████████▊ | 964/1100 [00:13<00:01, 68.92it/s][A
Population:  88%|████████▊ | 971/1100 [00:14<00:01, 68.84it/s][A
Population:  89%|████████▉ | 978/1100 [00:14<00:01, 68.75it/s][A
Population:  90%|████████▉ | 985/1100 [00:14<00:01, 68.70it/s][A
Population:  90%|█████████ | 992/1100 [00:14<00:01, 68.68it/s][A
Population:  91%|█████████ | 999/1100 [00:14<00:01, 68.66it/s][A
Population:  91%|█████████▏| 1006/1100 [00:14<00:01, 68.67it/s][A
Population:  92%|█████████▏| 1013/1100 [00:14<00:01, 68.65it/s][A
Population:  93%|█████████▎| 1020/1100 [00:14<00:01, 68.62it/s][A
Population:  93%|█████████▎| 1027/1100 [00:14<00:01, 68.57it/s][A
Population:  94%|█████████▍| 1034/1100 [00:15<00:00, 68.56it/s][A
Population:  95%|█████████▍| 1041/1100 [00:15<00:00, 68.55it/s][A
Population:  95%|█████████▌| 1048/1100 [00:15<00:00, 68.55it/s][A
Pop

pre (1205, 2000, 1)
Coef survive = 669
XSHAPE = (2000, 1205)
SURVIVED = 105 ,, Mutate Pop = 100





In [274]:
hist = g.get_history()
len(hist),len(hist[1])

(2, 205)

In [275]:
result2 = g.transform(trainX)

Generation: 0it [00:00, ?it/s]
  return umr_sum(a, axis, dtype, out, keepdims)

Generation: 1it [00:00,  7.06it/s]t/s][A


Transforming Generation 1
Transforming Generation 2



Generation: 2it [00:00,  6.26it/s]t/s][A

(1000, 2000, 1)





In [276]:
result.shape,result2.shape

((2000, 205), (2000, 205))

In [277]:
result

memmap([[0.11076325, 0.11076325, 0.11076325, ..., 1.9       , 0.64      ,
         1.9       ],
        [0.42082262, 0.42082262, 0.42082262, ..., 1.9       , 0.766     ,
         1.9       ],
        [0.5103923 , 0.5103923 , 0.5103923 , ..., 1.9       , 0.28      ,
         1.8998364 ],
        ...,
        [1.8996475 , 1.8996475 , 1.8996475 , ..., 1.9       , 1.486     ,
         1.9       ],
        [0.10021156, 0.10021156, 0.10021156, ..., 1.9       , 1.0899999 ,
         1.9       ],
        [1.9000001 , 1.9000001 , 1.9000001 , ..., 1.9       , 0.694     ,
         1.9       ]], dtype=float32)

In [184]:
result2

memmap([[0.97344315, 0.73954594, 1.0078804 , ..., 0.0999999 , 0.4780945 ,
         0.98739123],
        [1.180703  , 0.5523815 , 0.6134376 , ..., 0.0999999 , 0.97112095,
         1.0372684 ],
        [1.398929  , 0.282445  , 0.5607973 , ..., 0.10000014, 0.9033917 ,
         1.0678296 ],
        ...,
        [1.0597231 , 0.17913797, 0.10227752, ..., 0.0999999 , 1.1323093 ,
         1.632956  ],
        [0.6658411 , 0.4143459 , 0.11735737, ..., 0.0999999 , 1.200048  ,
         1.3406823 ],
        [1.1907865 , 0.9140285 , 0.95050573, ..., 0.0999999 , 0.8139951 ,
         0.8710251 ]], dtype=float32)

In [16]:
result.shape,result2.shape

((1000, 516), (1000, 516))

In [17]:
result

memmap([[0.641076  , 0.1       , 1.7380182 , ..., 0.8456841 , 1.7267678 ,
         0.30976182],
        [0.2859949 , 0.1       , 0.84086883, ..., 0.57633585, 1.7267678 ,
         0.44392622],
        [0.5119556 , 0.1       , 0.84086883, ..., 0.45570642, 0.75005496,
         0.14213815],
        ...,
        [0.38283518, 0.1       , 1.9       , ..., 0.721227  , 0.75005496,
         0.30438185],
        [0.4473954 , 0.1       , 0.84086883, ..., 0.6223329 , 1.7281086 ,
         0.511089  ],
        [0.2537148 , 0.1       , 0.84086883, ..., 0.44601327, 1.7281086 ,
         0.3941002 ]], dtype=float32)

In [18]:
result2

memmap([[0.641076  , 0.1       , 1.7380182 , ..., 0.8456841 , 1.7267678 ,
         0.30976182],
        [0.2859949 , 0.1       , 0.84086883, ..., 0.57633585, 1.7267678 ,
         0.44392622],
        [0.5119556 , 0.1       , 0.84086883, ..., 0.45570642, 0.75005496,
         0.14213815],
        ...,
        [0.38283518, 0.1       , 1.9       , ..., 0.721227  , 0.75005496,
         0.30438185],
        [0.4473954 , 0.1       , 0.84086883, ..., 0.6223329 , 1.7281086 ,
         0.511089  ],
        [0.2537148 , 0.1       , 0.84086883, ..., 0.44601327, 1.7281086 ,
         0.3941002 ]], dtype=float32)

In [18]:
hist = g.get_history()
len(hist),len(hist[2])

IndexError: list index out of range

In [1]:
cProfile.run("g.fit_transform(fitX,fitY)",'myFunction.profile')

import pstats
stats = pstats.Stats("myFunction.profile")
stats.strip_dirs().sort_stats('time').print_stats()

NameError: name 'cProfile' is not defined

In [36]:
M = 1000
priorprob = 1.-np.exp(0.05*np.arange(M))+np.exp(0.05*M)
priorprob = priorprob/np.sum(priorprob)

In [20]:
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'matplotlib'

In [223]:
a = np.array([1,2,3,4,5,6])


In [225]:
np.exp(a)

array([  2.71828183,   7.3890561 ,  20.08553692,  54.59815003,
       148.4131591 , 403.42879349])

In [29]:
np.array([Add]).dtype

dtype('O')

In [None]:
## return numbers only
class Log:
    def gen_n(x):
        return None
    def gen_var(x):
        return 1
    def compute(x,n):
        result = np.log(x[:,0])
        return np.nan_to_num(result)

class Add:
    def gen_n(x):
        return None
    def compute(x,n):
        return np.add(x[:,0],x[:,1])
    def gen_var(x):
        return 2

class Subtract:
    def gen_n(x):
        return None
    def compute(x,n):
        return np.subtract(x[:,0],x[:,1])
    def gen_var(x):
        return 2
    
class Multiply:
    def gen_n(x):
        return None
    def compute(x,n):
        return np.multiply(x[:,0],x[:,1])
    def gen_var(x):
        return 2

class Divide:
    def gen_n(x):
        return None
    def compute(x,n):
        result = np.divide(x[:,0],x[:,1])
        return np.nan_to_num(result)
    def gen_var(x):
        return 2

class Mean:
    def gen_n(x):
        return None
    def compute(x,n):
        return np.mean(x,axis = 1)
    def gen_var(x):
        return np.random.randint(1,10)

class Power:
    def gen_var(x):
        return 1
    def gen_n(x):
        n = choice([1,2,3,4])
        return n
    def compute(x,n):
        return np.nan_to_num(x**n)

class Exp:
    def gen_var(x):
        return 1
    def gen_n(x):
        return None
    def compute(x,n):
        return np.nan_to_num(np.exp(x))
    
class Tanh:
    def gen_var(x):
        return 1
    def gen_n(x):
        return None
    def compute(x,n):
        return np.nan_to_num(np.tanh(x))

    
class Cos:
    def gen_var(x):
        return 1
    def gen_n(x):
        return randint(0,90)
    
    def compute(x,n):
        return np.nan_to_num(np.cos(x+n))

class Sigmoid:
    def gen_var(x):
        return 1
    def gen_n(x):
        return None
    def compute(x,n):
        res = 1/(1+np.exp(-x))
        return res
    



    
