# Multi density clustering for evolving datastream
- [Grid](#Grid)
- [Offline Phase](#Offline-Phase)
- [Online Phase](#Online-Phase)
- [Model starts working here](#Working)

In [1]:
__author__='Black D Chase,MR-TLL'
__version__='0.0.1'  

In [None]:
#Imports
import torch
from math import ceil,log2
import log

In [None]:
# Globals - hyperparameters
alpha = 
lamda = 
N = 
gridGranuality = 25
dimension = 
wCriteria = alpha/(N(1-2**(-lamda)))

In [None]:
# Dataset input
data = read()
data

# Grid
 - Grid can be:
    - Sporadic Grid
    - Normal Grid
    - Core Mini Cluster

In [2]:
class Grid:
    gType=["Sporadic","Normal","CMC"]
    global alpha,N,lamda,wCriteria
    def __init__(self,location,):
        self.n = 0
        self.t = 0
        self.w = 0
        self.mcd = None
        self.c = None
        self.status = 0
        self.location = location
        # hash to be a tuple
        self.storage = []
    
    def __hash__(self):
        return hash(self.location)
    
    """ Might Not need this
    def __eq__(self,other):
        return self.location==other.location
    #"""

    
    def updateGS(self,t,w):
        self.n+=1
        self.t=t
        self.w=w
        pass
    
    def calMCD(self):
        ## This is the max distance from mean of all data points on the grid to all the other points on the grid.
        self.calCenter()
        mcd=0
        for points in self.storage:
            assert(len(self.c)==len(points))
            dist=0.0
            for each_dim in points:
                dist+=(self.c - each_dim)**2
            dist**(0.5)
            mcd=max(mcd,dist)

        return mcd
        pass
    
    def calCenter(self,time):
        ## time is the current time at the calculation of the center.

        center=[0 for i in self.storage[0]]
        assert(self.w!=0) ## weight must be non-zero for cmc center calc.

        length=len(self.storage[0].coords) ## replace coords with actual name in class str.
        # length/dimenstion of a single data point.

        for points in self.storage:
            timestamp=points.t
            weight=2**(-1*self.lamda*(time - timestamp))
            tmplst=[]
            for dims in range(length):
                tmplst.append(weight*points[dims])
            for idx in range(length):
                center[idx]+=tmplst[idx]

        for idx in range(length):
            center[idx]=center[idx]/len(self.w)

        return center

    def calRadius(self,time):
        radius=0
        assert(self.w!=0) ## weight must be non-zero for cmc center calc.

        length=len(self.storage[0].coords) ## replace coords with actual name in class str.
        # length/dimenstion of a single data point.

        for points in self.storage:
            timestamp=points.t
            weight=2**(-1*self.lamda*(time - timestamp))
            
            
            numerator=(weight*self.getDistance(points,self.c))
            
            radius+=numerator

        radius/=self.w

        return radius
    
    def addPoint(self,point):
        self.storage.append(point)
        if(status==2):
            self.__makeitCMC()
            log.debug(f"Status Updated {self.status}")
    
    def getDistance(self,point):
        dist=0.0
        dims=len(point)
        for idx in range(dims):
            dist+=(point[idx] - self.c[idx])**2
        dist=dist**(0.5)

        return dist
    
    def __flush(self):
        """
        Sporadic
        Will be deleted to make room
        """
        self.status = 0 
    
    def __makeitGrid(self):
        """
        Normal
        Will now be a normal grid
        """
        self.status = 1
        
    def __makeitCMC(self,t):
        """
        Core Mini Cluster
        Will now be considered as a mini cluster
        """
        self.calCenter(t)
        self.calMCD(t)
        self.calRadius(t)
        self.status = 2
    
    def __getOWT(self,t):
        OWT = alpha*(1-2**(-lamda(self.t-t+1)))/(N*(1-2**(-lamda*t)))
        return OWT
    
    def updateStatus(self,t):
        if self.n>1 and self.w>wCriteria:
            self.__makeitCMC(t)
        else if self.w<self.__getOWT(t) or self.n<1:
            self.__flush()
        else
            self.__makeitGrid()
        return self.status
    
    pass

## Grid Conditions
- Grid weight:
   $$
      W_g(t_c) = \sum_{x \subset g} 2^{-\lambda * (t_c - t_x )}
   $$
- Grid weight Update:
   $$
      W_g(t_p,t_c) = 2^{-\lambda * (t_c - t_x )}* w_g(t_p) + 1
   $$
- Maximum possible theoretrical Weight of all data points:
   $$
   w_{max} = \frac{1}{1-2^{-\lambda}}
   $$
- Time Quantum for Grid and CMC updates 
   $$
   t_{pt} = \frac{log_{2}{\big(\frac{\alpha}{\alpha - N(1-2^{- \lambda})}\big)}}{\lambda}
   $$
- Grid->CMC condition:
    $$
      n_g > 1 \text{ and } w_g \ge \frac{\alpha}{N(1-2^{-\lambda})}
    $$

- OWT parameter for GRID and CMC Updates with time  ::
    If $W_g < OWT$ --> remove grid from grid list.
    $$
      OWT(t_c,t_p) = \frac{\alpha(1-2^{-\lambda(t_c - t_p +1)})}{N(1-2^{-\lambda*t_p})}
    $$
    
## CMC Conditions
- Parameters Of CMC when a grid is promoted to CMC:
  $$
  W_{cmc} = W_{g}
  $$
  $$
  C_{cmc} = \frac{\sum_{1}^{n} f(t_c - t_i)(p_i)}{w_{cmc}}
  $$
  $$
  r_{cmc} = \frac{\sum_{1}^{n} f(t_c - t_i)(distance(p_{ij},c_{cmc}))}{w_{cmc}}
  $$

- If $$ W_{cmc} <  \frac{\alpha}{N(1-2^{-\lambda})} $$
   Then remove cmc from cmc list

# Clusterer

In [3]:
class Cluster:
    global alpha,N,lamda,wCriteria
    def __init__(self):
        self.gridList={} ## list which containg all the grids as objects.
        self.ng=0 ## no of grids made till now
        pass
    pass

# Online Phase

In [4]:
class Online(Cluster):
    def __init__(self):
        super(Offline,self).__init__()
        self.calPT()
        pass        
    
    def calPT(self):
        """
        Calculating Pruning time
        #"""
        t_pt = log2(alpha/(alpha -N*(1-2**(-lamda))))/lamda
        self.t_pt = ceil(t_pt)
        log.info(f"Pruning time = {self.t_pt}")
        
    def streamData(self,data):
        t=0
        # Time passed
        while(True):
            t+=1
            newPoints = data.readPointFromStream()
            if newPoint==None:
                break
            for point in newPoints:
                grid = self.findGrid(newPoint)
                """
                grid could be sporadic, normal or cmc
                point will be added 
                """
                grid.addPoint(newPoint)
                self.updateGrid(grid,t)
                    
            if t%self.t_pt==0:
                """
                Pruning, after every t_pt timestep
                """
                for grid in self.gridList.values():
                    self.updateGrid(grid,t)
                        
    def updateGrid(self,grid,t):
        grid.updateStatus(t)
        log.debug(f"{grid.location} status = {status}")
        if status==0:
            sporadic = self.gridList.pop(grid)
            log.debug(f"Popped {sporadic.location}")
            del sporadic
    
    def findGrid(point):
        pass
    
    def makeGrid(point):
        pass
            
    pass

# Offline Phase

In [6]:
class Offline(Cluster):
    def __init__(self):
        super(Offline,self).__init__()
        
        pass
    pass

# Working

In [None]:
# Initialsing Grids
"""
def initializeGridsStructure(data):
    global N,gridGranuality,dimension
#"""
# I Dont think we need to start 
#"""   