In [22]:
%matplotlib inline
%config InlineBackend.figure_formats = ['svg']

import random
import csv
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import cm # Colormaps
import matplotlib.gridspec as gridspec
from mpl_toolkits.axes_grid1 import make_axes_locatable
import seaborn as sns
import pandas as pd

sns.set_style('darkgrid')
np.random.seed(42)



In [44]:
class DataSets:
    """
    munClasses->specifies the number of classes the grid should have
    numPoints->specifies the number of points
    minRange->gives the lower limit of the classes and the points should be in
    maxRange->gives the upper limit of the classes and the points should be in
    """
    def __init__(myobject, numClasses, numpoints, minRange, maxRange):
        myobject.numClasses = numClasses
        myobject.numpoints = numpoints
        myobject.minRange = minRange
        myobject.maxRange = maxRange
        
    def multidimential_gauss(self, x, d, mean, covariance):
        """pdf of the multivariate normal distribution."""
        x_m = x - mean
        return (1. / (np.sqrt((2 * np.pi)**d * np.linalg.det(covariance))) * 
                np.exp(-(np.linalg.solve(covariance, x_m).T.dot(x_m)) / 2))
    
    def generate_surface(self, mean, covariance, d):
        """Helper function to generate density surface."""
        nb_of_x = 100 # grid size
        x1s = np.linspace(-100, 100, num=nb_of_x)
        x2s = np.linspace(-100, 100, num=nb_of_x)
        x1, x2 = np.meshgrid(x1s, x2s) # Generate grid
        pdf = np.zeros((nb_of_x, nb_of_x))
        # Fill the cost matrix for each combination of weights
        for i in range(nb_of_x):
            for j in range(nb_of_x):
                pdf[i,j] = self.multidimential_gauss(
                    np.matrix([[x1[i,j]], [x2[i,j]]]), 
                    d, mean, covariance)
        return x1, x2, pdf 
    
    def creating_distributions(self, mean1, mean2, covar, d,count):
        """makes things pretty - create illustrations of multiple gaussian distributions
        TODO- Change the graph size"""
        bidimentional_mean = np.matrix([[mean1], [mean2]])  # Mean
        bidimentional_covariance = np.matrix([
        [covar, 0.], 
        [0., covar]])  # Covariance
        x1, x2, p = self.generate_surface(bidimentional_mean, bidimentional_covariance, d)

        fig, axs = plt.subplots(nrows=1, ncols=1, figsize=(8,4))
        con = axs.contourf(x1, x2, p, 100, cmap=cm.YlGnBu)
        axs.set_xlabel('$x_1$', fontsize=10)
        axs.set_ylabel('$x_2$', fontsize=10)
        axs.axis([-200.5, 200.5, -200.5, 200.5])
        axs.set_aspect('equal')
        axs.set_title('Gaussian Distribution %s'%count, fontsize=13)

        # Add colorbar and title
        fig.subplots_adjust(right = 0.8)
        cbar_ax = fig.add_axes([0.85, 0.15, 0.02, 0.7])
        cbar = fig.colorbar(con, cax=cbar_ax)
        cbar.ax.set_ylabel('$p(x_1, x_2)$', fontsize=10)
        count = str(count)
        plt.savefig("/Users/navsa/OneDrive/Desktop/CSCI198Project/TimeSpan-to-classifyClass/Classes/%s_Gauss_Class"%count)
        #plt.show()
        #return fig

    def setting_mean_covariance(self):
        """generates multiple gaussian distributions/classes"""
        d=2
        with open ('GaussClasses.csv', mode='w') as Random_Gauss_Distributions:
            fieldnames = ['Mean1','Mean2', 'Covariance']
            writer=csv.DictWriter(Random_Gauss_Distributions,fieldnames=fieldnames)
            writer.writeheader()

            for i in range (self.numClasses):
                temp1 = random.gauss(self.minRange, self.maxRange) #First mean
                str_temp1=str(temp1)

                temp2 = random.gauss(self.minRange, self.maxRange) #second mean
                str_temp2=str(temp2)

                temp3 = random.gauss(self.minRange, self.maxRange) #covariance
                str_temp3=str(temp2)

                writer.writerow({'Mean1':str_temp1,'Mean2':str_temp2,'Covariance':str_temp3})
                
    def read_distributions(self):
        """
        Reads all the generted Gaussian distributions and 
        calls the creating_distributions function tto make illustrations for each distribution 
        """
        d=2
        with open ('GaussClasses.csv', mode='r') as Random_Gauss_Distributions:
            csv_reader=csv.DictReader(Random_Gauss_Distributions)

            linecount=0

            for row in csv_reader:

                if linecount == 0:
                    linecount+=1
                #elif linecount == 1:
                   # print(row['Mean1'])
                    #linecount+=1
                else:
                    #print(row['Mean1'])
                    #print(linecount)

                    self.creating_distributions(float(row['Mean1']), float(row['Mean2']),float(row['Covariance']), 
                                                d,linecount)
                    #pp.savefig(figure)
                    linecount+=1

                    
#     def return_center(m1,m2,c):
#         return [m1,m2]
    
    #distance of each class with the other
    def euclidean_distance(self, a, b):
        dist = np.linalg.norm(a - b)
        return dist
    
        #Creating a csv file
    def datasetOfPoints(self):
        """Creates random popints that ned to be classified into classes"""
        with open('PointsToBeClassified.csv', mode='w') as dist_write:
            feildnames = ['X','Y']
            writer = csv.DictWriter(dist_write, fieldnames = feildnames)
            writer.writeheader()
            for i in range (self.numpoints):
                writer.writerow({'X':random.uniform(self.minRange, self.maxRange), 
                                 'Y':random.uniform(self.minRange, self.maxRange)})

    def datasetOfPointsUsedToTrain(self):
        with open('PointsUsedToTrained.csv', mode='w') as dist_write:
            feildnames = ['X','Y']
            writer = csv.DictWriter(dist_write, fieldnames = feildnames)
            writer.writeheader()
            with open ('GaussClasses.csv', mode='r') as Random_Gauss_Distributions:
                csv_reader=csv.DictReader(Random_Gauss_Distributions)

                linecount=0

                for row in csv_reader:

                    if linecount == 0:
                        linecount+=1
                    #elif linecount == 1:
                       # print(row['Mean1'])
                        #linecount+=1
                    else:
                        #print(row['Mean1'])
                        #print(linecount)
                        writer.writerow({'X':random.gauss(float(row['Mean1']), float(row['Covariance'])), 
                                         'Y':random.gauss(float(row['Mean2']), float(row['Covariance']))})

                        linecount+=1
            