In [13]:
# import necessary libraries
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import normalize
from scipy.sparse.linalg import eigsh
from scipy.signal import find_peaks
from scipy.spatial import Delaunay
from scipy.spatial import distance
from sklearn.cluster import DBSCAN
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import multiprocessing as mp
from sklearn import metrics
import networkx as nx
import numpy as np
import random
import csv
import pandas as pd

In [14]:
%run ./functions_for_GrapHiC.ipynb

In [15]:
class Point():
    def __init__(self,x,y):
        self.x = x
        self.y = y
        self.label = []
        
        self.x_collected, self.y_collected = x,y # by default, if the point undergoes no perturbation, the coordinates collected are the GT coordiantes 
        
    def add_label(self, label_to_add):
        self.label.append(label_to_add)
        
        
    def Scramble(self, delta):
        mean = [0,0]
        cov_scramble = [[delta**2, 0], [0, delta**2]]
        coord_collected     = np.sum([np.array([self.x,self.y]),
                                        np.random.multivariate_normal(mean, cov_scramble)],
                                        axis = 0)
        self.x_collected = coord_collected[0]
        self.y_collected = coord_collected[1]
        
    def GetCoord(self):
        return np.array([self.x_collected,self.y_collected])
    
    def GetCoord_GT(self):
        return np.array([self.x,self.y])
    
    def Isin(self, cluster):
        diff = cluster.center -  self.GetCoord_GT()
        if np.linalg.norm(cluster.center -  self.GetCoord_GT()) < (cluster.radius): return True
        else: return False 
        

In [16]:
class Cluster():
    def __init__(self, center, radius, label):
        self.center = center
        self.radius = radius
        self.points = []
        self.label  = label
        
        
    def Fill(self, NumberOfPoints, cluster_shape):
        
        np.random.seed(20) 
        
        if cluster_shape == 'uniform':
            temp = uniform_cluster(self.radius, NumberOfPoints, self.center)
        if cluster_shape == 'gaussian':
            temp = gaussian_cluster((self.radius / 2), NumberOfPoints, self.center)
            
        for t in temp: 
            new_point = Point(x = t[0], y = t[1])
            new_point.add_label(self.label)
            self.points.append(new_point)

In [20]:
class DataGenerator():
    
    def __init__(self,
                 NumberOfClusters, LocPerCluster, r, IsolatedLoc,
                 NumberOfScales = 1,
                 delta = 1,
                 x_lim = [-5, 5], y_lim = [-5, 5],
                 cluster_shape = 'uniform'):
        
        # frame shape
        self.xmin, self.xmax = 1000 * x_lim[0], 1000 * x_lim[1] # from micrometer to nanometer 
        self.ymin, self.ymax = 1000 * y_lim[0], 1000 * y_lim[1]
        
        # parameters 
        self.NumberOfClusters = NumberOfClusters
        self.numbers          = LocPerCluster

        self.r                = r
        self.IsolatedLoc      = IsolatedLoc
        self.cluster_shape    = cluster_shape
        self.delta            = delta 
        
        # initialise an exmpty clusters list: 
        self.Clusters = []
        
        # initialise a empty list of Isolated points:
        self.IsolatedPoints = []
        

        for s in range(NumberOfScales):
            print('s = ', s )
        
        # data generation steps
        self.CreateClusters()
        self.AddIsolatedLoc()
        
        
    def PlaceCenters(self):
        # ------ Version with |ci - cj| < 2*rmax: ------
        self.centers      = np.random.uniform(low=[self.xmin + self.r, self.ymin + self.r],
                                              high=[self.xmax - self.r, self.ymax - self.r],
                                              size=(1,2))
        
        n = 1      # n counts the number of accepted points
        count = 0  # for avoiding endless loop
        while (n < self.NumberOfClusters) and (count < (self.NumberOfClusters * 10)): # 10 is arbitrarily chosen
            count += 1
            if (count  == ((self.NumberOfClusters * 10) -2) ): print('cannot place any more clusters')
            new_center = np.random.uniform(low=[self.xmin + self.r, self.ymin + self.r],
                                              high=[self.xmax-self.r, self.ymax-self.r],
                                              size=(1,2))
            is_accepted = True
            for i, c in enumerate(self.centers):
                if np.linalg.norm(c - new_center) < (2 * self.r): is_accepted = False # checking that there will be no overlap
            
            if is_accepted: 
                n += 1
                self.centers = np.vstack([self.centers, new_center])
            
            
    def InitializeClusters(self):
        # Creates the Cluster objects
        for i, center in enumerate(self.centers):   
            self.Clusters.append(Cluster(center, self.r, label = i+1))    # first cluster has label = 1. Label = 0 indicates noise. 

    def FillClusters(self):
        # This method calls the Fill method for each cluster. Fill will create n points in the cluster. 
        for i, cluster in enumerate(self.Clusters):
            if (self.cluster_shape != 'uniform') and (self.cluster_shape != 'gaussian'): print('cluster_shape must be "uniform" or "gaussian"')
            else: cluster.Fill(self.numbers[i], self.cluster_shape)
        
    def Scramble(self):
        print('in Scramble')
        for cluster in self.Clusters:
            for point in cluster.points: 
                point.Scramble(self.delta)
    
    def CreateClusters(self):
        self.PlaceCenters()
        self.InitializeClusters()
        self.FillClusters()
        self.Scramble()
        
    
    def AddIsolatedLoc(self):
        # This method creates Isolated points, randomly distributed, but avoiding the cluster areas. 
        n = 0  # n counts the number of isolated locations that are added to the data
        while n < self.IsolatedLoc:
            new_noise = np.random.uniform(low=[self.xmin, self.ymin], high=[self.xmax, self.ymax], size=(1,2))
            new_noise = Point(new_noise[0][0], new_noise[0][1]) # putting it in a Point variable, to use its methods.
            new_noise.add_label(0)  # 0 label is for noise
            
            # If the new point is in a cluster, discard it. 
            is_accepted = True 
            for cluster in self.Clusters: 
                if new_noise.Isin(cluster): is_accepted = False
                
            if is_accepted: 
                self.IsolatedPoints.append(new_noise)
                n += 1
            
    def GetPointsCoord(self):
        # returns in an  numpy array the info about all cluster points + noise points
        points  = np.array([[0, 0]]) 
        
        # Points from the Clusters
        for cluster in self.Clusters:
            for p in cluster.points: 
                points = np.vstack([points, p.GetCoord()]) # extend the array with next points
                
        # Isolated Points
        for isolated in self.IsolatedPoints: 
            points = np.vstack([points, isolated.GetCoord()])
        
        return points[1:]
            
    def GetPointsLabels(self):
        labels = []
        for cluster in self.Clusters: 
            for p in cluster.points: 
                labels.append(p.label[0])
                
        for isolated in self.IsolatedPoints:
            labels.append(isolated.label[0])
        return labels
    
    def plot_points(self):
        P1              = self.GetPointsCoord()
        labels          = self.GetPointsLabels()
        plot_points(P1, labels, 'Input Data: GT', 1)
        plot_points(P1, len(labels) * [0], 'Input Data', 1)
        
    def save_to_csv(self, path, filename):
        # This function saves a .csv file with columns: x, y, and labels. Each row corresponds to one point.
        # Give the filename without .csv, and the path without / 
        # Il faudra ajouter une autre colonne pour toutes les autres échelles
        
        # concatenate the positions and the label associated, in a pandas dataframe variable,
        # with columns names: x, y, labels_1, ..., labels_s, s the number of scales 
        df_xy    = pd.DataFrame(self.GetPointsCoord(), columns = ['x','y'])
        df_l     = pd.DataFrame(self.GetPointsLabels(), columns= ['labels_1'])
        df_final = pd.concat([df_xy, df_l], axis = 1)
        
        df_final.to_csv(path + '/' + filename + '.csv', index = False)

datagen.save_to_csv('/Users/Eliana/Documents/PDM', 'essai_datagen2')

test = pd.read_csv('/Users/Eliana/Documents/PDM/essai_datagen2.csv')
test

for cluster in datagen.Clusters: 
    print(cluster.center)
print(len(datagen.centers))

for cluster in datagen.Clusters: 
    for p in cluster.points:
        print(p.x, p.y)
        print('label:', p.label[0])
        print(p.x_collected, p.y_collected)

[x,y] = np.array([np.sum([p, np.random.multivariate_normal(mean, cov_scramble)],
                                        axis = 0) for p in self.cluster_points])

[x,y] = [1,2]

delta = 0.1
mean = [0,0]
cov_scramble = [[delta**2, 0], [0, delta**2]]
[x, y] = np.sum([np.array([x,y]), np.random.multivariate_normal(mean, cov_scramble)],
                                        axis = 0)