In [4]:
import pandas as pd
import numpy as np
from scipy.stats import kstest
import scipy.stats as stats


# Define the list of CSV files
csv_files = [
    '/home/ayus/Downloads/rtut/2018-2019_ICICI.csv',
    '/home/ayus/Downloads/rtut/2019-2020_ICICI.csv',
    '/home/ayus/Downloads/rtut/2020-2021_ICICI.csv',
    '/home/ayus/Downloads/rtut/2021-2022_ICICI.csv',
    '/home/ayus/Downloads/rtut/2022-2023_ICICI.csv'
]

# Read and concatenate the datasets
df_icici = pd.concat([pd.read_csv(file) for file in csv_files], ignore_index=True)

# Modify the column names by removing leading/trailing whitespace
df_icici.columns = df_icici.columns.str.strip()

# Extract the Opening and Closing prices from the DataFrame
opening_prices = df_icici['Open Price'].values
closing_prices = df_icici['Close Price'].values

# Perform checks for KS test
is_valid_for_ks = True

# Check if the data is continuous
is_continuous = np.issubdtype(opening_prices.dtype, np.number)
if not is_continuous:
    is_valid_for_ks = False
    print("Data is not continuous and may not be suitable for the KS test.")

# Check if the sample size is adequate
sample_size = len(opening_prices)
if sample_size < 30:
    is_valid_for_ks = False
    print("Sample size is small and may not be suitable for the KS test.")

# Perform the KS test if the data is valid
if is_valid_for_ks:
    distribution_names = ['norm', 'lognorm', 'expon', 'gamma']

    for distribution in distribution_names:
        # Fit the data to the distribution
        params_opening = getattr(stats, distribution).fit(opening_prices)
        params_closing = getattr(stats, distribution).fit(closing_prices)

        # Compute KS test statistics and p-values
        test_statistic_opening, p_value_opening = kstest(opening_prices, distribution, args=params_opening)
        test_statistic_closing, p_value_closing = kstest(closing_prices, distribution, args=params_closing)

        print(f"KS test statistic for Opening prices ({distribution}): {test_statistic_opening:.4f}, p-value: {p_value_opening:.4f}")
        print(f"KS test statistic for Closing prices ({distribution}): {test_statistic_closing:.4f}, p-value: {p_value_closing:.4f}")


KS test statistic for Opening prices (norm): 0.1418, p-value: 0.0000
KS test statistic for Closing prices (norm): 0.1393, p-value: 0.0000
KS test statistic for Opening prices (lognorm): 0.1125, p-value: 0.0000
KS test statistic for Closing prices (lognorm): 0.1129, p-value: 0.0000
KS test statistic for Opening prices (expon): 0.1014, p-value: 0.0000
KS test statistic for Closing prices (expon): 0.1026, p-value: 0.0000
KS test statistic for Opening prices (gamma): 0.1062, p-value: 0.0000
KS test statistic for Closing prices (gamma): 0.1064, p-value: 0.0000
