# Multi density clustering for evolving datastream
- [Grid](#Grid)
- [Offline Phase](#Offline-Phase)
- [Online Phase](#Online-Phase)
- [Model starts working here](#Working)

In [1]:
__author__='Black D Chase,MR-TLL'
__version__='0.0.1'

In [None]:
#Imports
import torch
from math import ceil,log2
import log

In [None]:
# Globals - hyperparameters
alpha = 
lamda = 
N = 
gridGranuality = 25
dimension = 
wCriteria = alpha/(N(1-2**(-lamda)))

In [None]:
# Dataset input
data = read()
data

# Grid
 - Grid can be:
    - Sporadic Grid
    - Normal Grid
    - Core Mini Cluster

In [2]:
class Grid:
    gType=["Sporadic","Normal","CMC"]
    global alpha,N,lamda,wCriteria
    def __init__(self,location,):
        self.n = 0
        self.t = 0
        self.w = 0
        self.mcd = None
        self.c = None
        self.status = 0
        self.location = location
        # hash to be a tuple
        
        self.storage = []
    
    def __hash__(self):
        return hash(self.location)
    
    def __eq__(self,other):
        return self.location==other.location
    
    def updateGS(self,t,w):
        self.n+=1
        self.t=t
        self.w=w
        pass
    
    def calMCD(self):
        ## This is the max distance from mean of all data points on the grid to all the other points on the grid.
        self.calCenter()
        mcd=0
        for points in self.storage:
            assert(len(self.c)==len(points))
            dist=0.0
            for each_dim in points:
                dist+=(self.c - each_dim)**2
            dist**(0.5)
            mcd=max(mcd,dist)

        return mcd
        pass
    
    def calCenter(self):
        center=[0 for i in self.storage[0]]
        length=len(self.storage[0])
        for points in self.storage:
            for dims in range(length):
                center[dims]+=points[dim]
        for idx in range(length):
            center[idx]=center[idx]/len(self.storage)

        return center
    
    def addPoint(self,point):
        self.storage.append(point)
    
    def getDistance(self,point):
        pass
    
    def __flush(self):
        self.storage = []
        self.n = 0
        self.t = 0
        self.w = 0
        self.mcd = None
        self.c = None
        self.status = 0 # Sporadic
    
    def __makeitGrid(self):
        self.status = 1
        
    def __makeitCMC(self):
        self.calCenter()
        self.calMCD()
        self.status = 2
    
    def __getOWT(self,t):
        OWT = alpha*(1-2**(-lamda(self.t-t+1)))/(N*(1-2**(-lamda*t)))
        return OWT
    
    def updateStatus(self,t):
        if self.n>1 and self.w>wCriteria:
            self.__makeitCMC()
        else if self.w<self.__getOCT(t) or self.n<1:
            self.__flush()
        else
            self.__makeitGrid()
        return self.status
    
    pass

## Grid Conditions
- Grid weight:
   $$
      W_g(t_c) = \sum_{x \subset g} 2^{-\lambda * (t_c - t_x )}
   $$
- Grid weight Update:
   $$
      W_g(t_p,t_c) = 2^{-\lambda * (t_c - t_x )}* w_g(t_p) + 1
   $$
- Maximum possible theoretrical Weight of all data points:
   $$
   w_{max} = \frac{1}{1-2^{-\lambda}}
   $$
- Time Quantum for Grid and CMC updates 
   $$
   t_{pt} = \frac{log_{2}{\big(\frac{\alpha}{\alpha - N(1-2^{- \lambda})}\big)}}{\lambda}
   $$
- Grid->CMC condition:
    $$
      n_g > 1 \text{ and } w_g \ge \frac{\alpha}{N(1-2^{-\lambda})}
    $$

- OWT parameter for GRID and CMC Updates with time  ::
    If $W_g < OWT$ --> remove grid from grid list.
    $$
      OWT(t_c,t_p) = \frac{\alpha(1-2^{-\lambda(t_c - t_p +1)})}{N(1-2^{-\lambda*t_p})}
    $$
    
## CMC Conditions
- Parameters Of CMC when a grid is promoted to CMC:
  $$
  W_{cmc} = W_{g}
  $$
  $$
  C_{cmc} = \frac{\sum_{1}^{n} f(t_c - t_i)(p_i)}{w_{cmc}}
  $$
  $$
  r_{cmc} = \frac{\sum_{1}^{n} f(t_c - t_i)(distance(p_{ij},c_{cmc}))}{w_{cmc}}
  $$

- If $$ W_{cmc} <  \frac{\alpha}{N(1-2^{-\lambda})} $$
   Then remove cmc from cmc list

# Clusterer

In [3]:
class Cluster:
    global alpha,N,lamda,wCriteria
    def __init__(self):
        
        pass
    pass

# Online Phase

In [4]:
class Online(Cluster):
    def __init__(self):
        super(Offline,self).__init__()
        self.gridList={} ## list which containg all the grids as objects.
        self.ng=0 ## no of grids made till now
        self.calPT()
        pass        
    
    def calPT(self):
        """
        Calculating Pruning time
        #"""
        t_pt = log2(alpha/(alpha -N*(1-2**(-lamda))))/lamda
        self.t_pt = ceil(t_pt)
        log.info(f"Pruning time = {self.t_pt}")
        
    def streamData(self,data):
        t=0
        # Time passed
        while(True):
            t+=1
            newPoints = data.readPointFromStream()
            if newPoint==None:
                break
            for point in newPoints:
                grid = self.findGrid(newPoint)
                """
                grid could be sporadic, normal or cmc
                point will be added 
                """
                grid.addPoint(newPoint)
                grid.updateStatus(t)
            
            if t%self.t_pt==0:
                """
                Pruning, after every t_pt timestep
                """
                for loc in self.gridList.keys():
                    status = self.gridList[loc].updateStatus(t)
                    if status==0:
                        sporadic = self.gridList.pop(loc)
                        del sporadic
    pass

# Offline Phase

In [6]:
class Offline(Cluster):
    def __init__(self):
        super(Offline,self).__init__()
        
        pass
    pass

# Working

In [None]:
# Initialsing Grids
"""
def initializeGridsStructure(data):
    global N,gridGranuality,dimension
#"""
# I Dont think we need to start 
#"""   