In [6]:
import numpy as np
import pandas as pd
from scipy import stats

def fit_best_distribution(data):
    distributions = ['gamma', 'pearson3', 'lognorm', 'gumbel_r', 'norm']
    best_distribution = None
    best_params = None
    best_ks_statistic = np.inf
    
    for distribution in distributions:
        # Fit the distribution to the data
        params = getattr(stats, distribution).fit(data)
        
        # Perform K-S test
        ks_statistic, p_value = stats.kstest(data, distribution, args=params)
        
        # Update best distribution if current one has better K-S statistic
        if ks_statistic < best_ks_statistic:
            best_distribution = distribution
            best_params = params
            best_ks_statistic = ks_statistic
    
    return best_distribution, best_params, best_ks_statistic

def main():
    # Load rainfall data from CSV file
    data = pd.read_csv(r'D:\Personal projects\Dummy Data\data.csv', usecols=[0]).values.flatten()
    
    # Fit best distribution
    best_distribution, best_params, best_ks_statistic = fit_best_distribution(data)
    
    # Print results
    print("Best Fitted Distribution:", best_distribution)
    print("Parameters of Best Fitted Distribution:", best_params)
    print("K-S Test Statistic Value:", best_ks_statistic)

if __name__ == "__main__":
    main()


Best Fitted Distribution: norm
Parameters of Best Fitted Distribution: (4.706160931908705, 9.98051984013918)
K-S Test Statistic Value: 0.3186294907336016
