In [148]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.special import kl_div
from scipy.special import rel_entr
from scipy.stats import norm, expon, gamma,beta
import warnings
warnings.filterwarnings("ignore")

In [149]:
def KLD(data,distributions):
  best_total_divergence=np.inf
  for distribution in distributions:
    # Fit distribution to data
    n=data.size
    params = distribution.fit(data)

    # Calculate PDF of the fitted normal distribution
    pdf = distribution.pdf(data,*params)

    # Calculate KL divergence between empirical distribution and fitted distribution
    kl_divergence =rel_entr(np.full(n,1/n), pdf)

    # Sum KL divergence values to get total divergence
    total_divergence = np.sum(kl_divergence)

    if total_divergence < best_total_divergence:
          best_distribution = distribution
          best_params = params
          best_total_divergence = total_divergence

    print(f'Total KL Divergence for {distribution.name} with parameters {params}: {total_divergence}')

  print(f'Best distribution: {best_distribution.name}, Total divergence = {best_total_divergence}, Parameters = {best_params}')


In [150]:
def AIC(distributions,data):

  best_distribution = None
  best_params = None
  best_aic = np.inf

  for distribution in distributions:
      # Fit distribution to data
      params = distribution.fit(data)

      # Calculate log-likelihood
      log_likelihood = distribution.logpdf(data, *params).sum()

      # Calculate number of parameters
      num_params = len(params)

      # Calculate AIC
      aic = -2 * log_likelihood + 2 * num_params

      print(f'{distribution.name}: AIC = {aic}, Parameters = {params}')

      # Update best distribution if current AIC is lower
      if aic < best_aic:
          best_distribution = distribution
          best_params = params
          best_aic = aic

  print(f'Best distribution: {best_distribution.name}, AIC = {best_aic}, Parameters = {best_params}')


In [151]:
def BIC(distributions,data):
  best_distribution = None
  best_params = None
  best_bic = np.inf

  for distribution in distributions:
    # Fit distribution to data
    params = distribution.fit(data)

    # Calculate log-likelihood
    log_likelihood = distribution.logpdf(data, *params).sum()

    # Calculate number of parameters
    num_params = len(params)

    # Calculate BIC
    bic = -2 * log_likelihood + num_params * np.log(len(data))

    print(f'{distribution.name}: BIC = {bic}, Parameters = {params}')

    # Update best distribution if current BIC is lower
    if bic < best_bic:
        best_distribution = distribution
        best_params = params
        best_bic = bic

  print(f'Best distribution: {best_distribution.name}, BIC = {best_bic}, Parameters = {best_params}')

In [152]:
shape=2
size=1000
scale=5
data0=np.random.beta(shape,scale,size)
data1=np.random.exponential(2*scale,size)
data2=np.random.gamma(shape, 2*scale, size)
data3=np.random.normal(0, 2*scale, size)
filenames = [data0, data1, data2, data3]
distributions=[norm,expon,gamma,beta]
for data in filenames:
    print("--------------------------------------------")
    #Using KL Divergence
    KLD(data,distributions)
    #Using AIC
    AIC(distributions,data)
    #Using BIC
    BIC(distributions,data)

--------------------------------------------
Total KL Divergence for norm with parameters (0.2826264503959966, 0.16080795797220984): -7.316361179362392
Total KL Divergence for expon with parameters (0.006594143239729134, 0.2760323071562675): -7.194992644183504
Total KL Divergence for gamma with parameters (3.6711466211879236, -0.039694705206928096, 0.0877985591763701): -7.3684315793572495
Total KL Divergence for beta with parameters (1.8149851100186734, 4.381063535691971, 0.0029151431267607562, 0.9537718663032309): -7.386933936125613
Best distribution: beta, Total divergence = -7.386933936125613, Parameters = (1.8149851100186734, 4.381063535691971, 0.0029151431267607562, 0.9537718663032309)
norm: AIC = -813.2118007605102, Parameters = (0.2826264503959966, 0.16080795797220984)
expon: AIC = -570.4747304027325, Parameters = (0.006594143239729134, 0.2760323071562675)
gamma: AIC = -915.3526007502244, Parameters = (3.6711466211879236, -0.039694705206928096, 0.0877985591763701)
beta: AIC = -9