In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

def check_inr_usd_normality(years_of_data=5):
    """
    Fetches INR-USD data, calculates daily log returns, visualizes their distribution,
    and performs statistical normality tests.

    Args:
        years_of_data (int): The number of years of historical data to fetch.
                             Defaults to 5 years.
    """
    print(f"--- Starting Normality Check for INR-USD Log Returns (last {years_of_data} years) ---")

    # 1. Get the Data
    # The ticker 'INR=X' represents the value of 1 USD in Indian Rupees.
    ticker = "INR=X"
    end_date = pd.to_datetime('today').strftime('%Y-%m-%d')
    start_date = (pd.to_datetime('today') - pd.DateOffset(years=years_of_data)).strftime('%Y-%m-%d')

    print(f"\nFetching data for {ticker} from {start_date} to {end_date}...")
    try:
        data = yf.download(ticker, start=start_date, end=end_date, progress=False)
        # progress=False hides the download progress bar, making output cleaner
    except Exception as e:
        print(f"Error fetching data: {e}. Please check your internet connection or the ticker symbol.")
        return

    if data.empty:
        print("Could not fetch data. The dataframe is empty. This might be due to an invalid ticker or date range.")
        return

    print("Data fetched successfully. Displaying the first 5 rows of the data:")
    print(data.head())
    print(f"\nTotal data points fetched: {len(data)}")

    # 2. Calculate Log Returns
    # Ensure 'Close' column is numeric. Coerce non-numeric values to NaN.
    data['Close'] = pd.to_numeric(data['Close'], errors='coerce')
    # Drop rows where 'Close' price is NaN, as these cannot be used for return calculation.
    data.dropna(subset=['Close'], inplace=True)

    if data['Close'].empty:
        print("No valid 'Close' prices found after cleaning. Cannot calculate returns.")
        return

    # Calculate daily log returns: ln(P_t / P_{t-1})
    # .shift(1) moves the previous day's close price to the current row.
    data['Log_Returns'] = np.log(data['Close'] / data['Close'].shift(1))
    # The first row of 'Log_Returns' will be NaN due to .shift(1), so drop it.
    data.dropna(subset=['Log_Returns'], inplace=True)

    if data['Log_Returns'].empty:
        print("No valid log returns could be calculated. This can happen if there's only one data point or issues with data quality.")
        return

    log_returns = data['Log_Returns']
    print(f"\nCalculated {len(log_returns)} Log Returns. Displaying basic statistics:")
    print(log_returns.describe())

    # 3. Visualize the Distribution
    print("\n--- Generating Visualizations (Histogram and Q-Q Plot) ---")
    plt.figure(figsize=(14, 6))

    # Histogram: Shows the frequency distribution of the log returns.
    # kde=True adds a Kernel Density Estimate plot, which is a smoothed version of the histogram.
    plt.subplot(1, 2, 1)
    sns.histplot(log_returns, kde=True, bins=50, color='skyblue')
    plt.title(f'Histogram of {ticker} Daily Log Returns', fontsize=14)
    plt.xlabel('Log Returns', fontsize=12)
    plt.ylabel('Frequency', fontsize=12)
    plt.grid(axis='y', alpha=0.75, linestyle='--')
    plt.axvline(log_returns.mean(), color='red', linestyle='dashed', linewidth=1, label=f'Mean: {log_returns.mean():.6f}')
    plt.legend()


    # Q-Q Plot (Quantile-Quantile Plot): Compares the quantiles of our data
    # against the quantiles of a theoretical normal distribution.
    # If data is normal, points should lie on a straight line.
    plt.subplot(1, 2, 2)
    stats.probplot(log_returns, dist="norm", plot=plt)
    plt.title(f'Q-Q Plot of {ticker} Daily Log Returns vs. Normal Distribution', fontsize=14)
    plt.xlabel('Theoretical Quantiles (Normal Distribution)', fontsize=12)
    plt.ylabel('Ordered Values (Log Returns)', fontsize=12)
    plt.grid(True, linestyle='--', alpha=0.6)

    plt.tight_layout(pad=3.0) # Adjust layout to prevent overlap of subplots
    plt.show()

    # 4. Perform Statistical Tests
    print("\n--- Performing Statistical Normality Tests ---")
    print("Interpretation: For most tests, a p-value > 0.05 suggests the data is normally distributed (Fail to reject H0).")
    print("A p-value <= 0.05 suggests the data is NOT normally distributed (Reject H0).")

    # Shapiro-Wilk Test
    # This test is generally considered one of the most powerful normality tests.
    # H0: The data is normally distributed.
    # Ha: The data is not normally distributed.
    shapiro_stat, shapiro_p = stats.shapiro(log_returns)
    print(f"\nShapiro-Wilk Test:")
    print(f"  Statistic = {shapiro_stat:.6f}, p-value = {shapiro_p:.6f}")
    if shapiro_p > 0.05:
        print("  Conclusion: Fail to reject H0. Log Returns appear to be normally distributed.")
    else:
        print("  Conclusion: Reject H0. Log Returns do NOT appear to be normally distributed.")

    # D'Agostino's K-squared Test (Omnibus Test)
    # This test is based on skewness and kurtosis.
    # H0: The data is normally distributed (i.e., skewness and excess kurtosis are zero).
    # Ha: The data is not normally distributed.
    k2_stat, k2_p, skewness, kurtosis = stats.normaltest(log_returns)
    print(f"\nD'Agostino's K-squared (Omnibus) Test:")
    print(f"  Statistic = {k2_stat:.6f}, p-value = {k2_p:.6f}")
    print(f"  Calculated Skewness = {skewness:.6f}, Kurtosis = {kurtosis:.6f} (Excess Kurtosis)")
    if k2_p > 0.05:
        print("  Conclusion: Fail to reject H0. Log Returns appear to be normally distributed.")
    else:
        print("  Conclusion: Reject H0. Log Returns do NOT appear to be normally distributed.")

    # Kolmogorov-Smirnov Test
    # Tests if a sample comes from a specified distribution.
    # For normality, we need to provide the mean and standard deviation of the normal distribution.
    # H0: The sample data is drawn from a normal distribution with the given parameters.
    # Ha: The sample data is not drawn from a normal distribution.
    mean_lr = log_returns.mean()
    std_lr = log_returns.std()
    ks_stat, ks_p = stats.kstest(log_returns, 'norm', args=(mean_lr, std_lr))
    print(f"\nKolmogorov-Smirnov Test:")
    print(f"  Statistic = {ks_stat:.6f}, p-value = {ks_p:.6f}")
    if ks_p > 0.05:
        print("  Conclusion: Fail to reject H0. Log Returns appear to be normally distributed.")
    else:
        print("  Conclusion: Reject H0. Log Returns do NOT appear to be normally distributed.")

    # Anderson-Darling Test
    # This test is a modification of the K-S test and gives more weight to the tails of the distribution.
    # It provides critical values for different significance levels.
    # H0: The data comes from a specified distribution (here, normal).
    # Ha: The data does not come from the specified distribution.
    ad_result = stats.anderson(log_returns, dist='norm')
    print(f"\nAnderson-Darling Test:")
    print(f"  Statistic = {ad_result.statistic:.6f}")
    for i in range(len(ad_result.critical_values)):
        sl, cv = ad_result.significance_level[i], ad_result.critical_values[i]
        print(f"  At {sl:.1f}% significance level, Critical Value = {cv:.6f}")
        if ad_result.statistic < cv:
            print(f"    (Statistic < Critical Value: Data appears normal at {sl:.1f}% significance.)")
        else:
            print(f"    (Statistic > Critical Value: Data does NOT appear normal at {sl:.1f}% significance.)")

    print("\n--- Normality Check Complete ---")

# --- To run the analysis, call the function ---
if __name__ == "__main__":
    # You can adjust the number of years of data to fetch here
    check_inr_usd_normality(years_of_data=10)
