# Gaussian Distribution Function

### By Rodrigo Guarneros

This function is created based on an Oriented Object Programming approach.

This is a class element where you can find attribues and methods such as: 

- mean, stdev and data
- read_data_file, calculate_mean, plot_histogram, probability calculation (pdf) and plot_histogram_pdf 

In [1]:
#Dependencies
import math
import matplotlib.pyplot as plt

In [6]:
class Gaussian():
    """ 
    Gaussian distribution class for caluculating and visualizing a Gaussian Distribution
    Attributes: 
    mean (float) representing the mean value of the distribution
    stdev (float) representing the standard deviation of the distribution
    data_list (list of floats) a list of floats extracted form the data file
    
    """
    def __init__(self, mu = 0, sigma = 1):
        self.mean = mu
        self.stdev = sigma
        self.data = []
    
    def calculate_mean(self):
        """
        Method to calculate the mean of the data set
        
        Args: 
            none
        Returns:
            float: mean of the data set
        
        """
        avg = 1.0 * sum(self.data)/len(self.data)
    
    def calculate_stdev(self, sample=True):
        """
        Function to calculate stardard deviation of the data set.
        Args: 
            Sample (bool): whether the data represents a sample or population
            
        returns: 
            float : standard deviation of the date set
        """
        
        if sample:
            n= len(self.data)-1
        else:
            n = len(self.data)
        
        mean = self.mean
        
        sigma = 0
        
        for d in self.data:
            sigma += (d-mean)**2
            
        sigma = math.sqrt(sigma/n)
        
        sef.stdev = sigma
        
        return self.stdev
    
    def read_data_file(self, file_name, sample=True):
        with open(file_name) as file:
            data_list = []
            line = file.readline()
            while line:
                data_list.append(int(line))
                line = file.readline()
        file.close()
        
        self.data = data_list
        self.mean = self.calculate_mean()
        self.stdev = self.calculate_stdev(sample)
    
    def plot_histogram(self):
        """
        Function to output a histogram of the instance variable data using matplotlib 
        pyplot library
        
        Args: 
            none
            
        Returns: 
            none
        """
        plt.hist(self.data)
        plt.title('Histogram of Data')
        plt.xlabel('data')
        plt.ylabel('count')
    
    def pdf(self,x):
        """
        Probability density function calculator for the gaussian distribution
        Args: 
            None
        Retuns:
            float: probability density function output
        """
        return(1.0 / (self.stdev*math.sqrt(2*math.pi)))*math.exp(-0.5 + ((x-self.mean)/self.stdev)**2)
    
    
    def plot_histogram_pdf(self, n_space=50):
        """
        Function to plot the normalized histogram of the data and plot of the 
        propability density function along the same range
        
        Args: 
            n_spaces (int): number of data points
        Returns: 
            list: x values for the pdf plot
            list: y values for the pdf plot
        """
        
        mu = self.mean
        sigma = self.stdev
        min_range = min(self.data)
        max_range = max(self.data)
        
        # calculates the interval between x values
        
        interval = 1.0 * (max_range - min_range)/n_spaces
        
        x = []
        y = []
        
        for i in range(n_spaces):
            tmp = min_range + interval * i
            x.append(tmp)
            y.append(self.pdf(tmp))
        
        # make de plots
        fig, axes = plt.subplots(2, sharex=True)
        fig.subplots_adjust(hspace=.5)
        axes[0].hist(self.data, densisty=True)
        axes[0].set_title("Normed Histogram of Data")
        axes[0].set_ylabel('Density')
        
        axes[1].plot(x,y)
        axes[1].set_title['Normal Distribution for \n Sample MEan and Sample Standard Deviation']
        axes[0].set_ylabel['Density']
        
        return x,y
        