<a href="https://colab.research.google.com/github/AISEN312/2024QuantInternships/blob/main/FRAM_TABLES.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis, pearsonr, jarque_bera
from statsmodels.tsa.stattools import adfuller
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.arima.model import ARIMA

In [5]:
crude_oil_data = pd.read_excel('/content/crudeF.xlsx')
# Load natural gas price data
nat_gas_data = pd.read_excel('/content/Natural gas futures india.xlsx')

In [7]:
# Descriptive statistics calculation
def descriptive_statistics(series):
    stats = {
        'Mean': series.mean(),
        'Standard Deviation': series.std(),
        'Skewness': skew(series),
        'Kurtosis': kurtosis(series),
    }
    jb_stat, jb_p = jarque_bera(series)
    stats['JB Statistic'] = jb_stat
    stats['JB p-value'] = jb_p
    return stats

# ADF test for stationarity
def adf_test(series):
    adf_stat, p_value, _, _, crit_values, _ = adfuller(series)
    return {'ADF Statistic': adf_stat, 'p-value': p_value, 'Critical Values': crit_values}

# Ljung-Box test for autocorrelation
def ljung_box_test(series, lags):
    result = acorr_ljungbox(series, lags=[lags], return_df=True)
    return {'LB Statistic': result['lb_stat'].iloc[0], 'p-value': result['lb_pvalue'].iloc[0]}

# Pearson correlation
def unconditional_pearson_corr(series1, series2):
    corr, p_value = pearsonr(series1, series2)
    return {'Correlation': corr, 'p-value': p_value}


In [15]:

  import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis, pearsonr, jarque_bera
from statsmodels.tsa.stattools import adfuller
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.arima.model import ARIMA

# ... (your data loading code)

# Descriptive statistics calculation
def descriptive_statistics(series):
    # Remove infinite and NaN values before calculating statistics
    series = series.replace([np.inf, -np.inf], np.nan).dropna()
    stats = {
        'Mean': series.mean(),
        'Standard Deviation': series.std(),
        'Skewness': skew(series),
        'Kurtosis': kurtosis(series),
    }
    jb_stat, jb_p = jarque_bera(series)
    stats['JB Statistic'] = jb_stat
    stats['JB p-value'] = jb_p
    return stats

# ADF test for stationarity
def adf_test(series):
    # Remove infinite and NaN values before ADF test
    series = series.replace([np.inf, -np.inf], np.nan).dropna()
    adf_stat, p_value, _, _, crit_values, _ = adfuller(series)
    return {'ADF Statistic': adf_stat, 'p-value': p_value, 'Critical Values': crit_values}

# Ljung-Box test for autocorrelation
def ljung_box_test(series, lags):
    # Remove infinite and NaN values before Ljung-Box test
    series = series.replace([np.inf, -np.inf], np.nan).dropna()
    result = acorr_ljungbox(series, lags=[lags], return_df=True)
    return {'LB Statistic': result['lb_stat'].iloc[0], 'p-value': result['lb_pvalue'].iloc[0]}

# Pearson correlation
def unconditional_pearson_corr(series1, series2):
    # Remove infinite and NaN values before calculating correlation
    series1 = series1.replace([np.inf, -np.inf], np.nan).dropna()
    series2 = series2.replace([np.inf, -np.inf], np.nan).dropna()
    corr, p_value = pearsonr(series1, series2)
    return {'Correlation': corr, 'p-value': p_value}

# ... (your analysis code)

# Crude oil descriptive statistics
crude_oil_stats = descriptive_statistics(crude_oil_data['Last Price  (R3)']) # Make sure column name is correct
crude_oil_adf = adf_test(crude_oil_data['Last Price  (R3)']) # Make sure column name is correct
crude_oil_ljung_box_10 = ljung_box_test(crude_oil_data['Last Price  (R3)'], lags=10) # Make sure column name is correct


# Natural gas descriptive statistics
nat_gas_stats = descriptive_statistics(nat_gas_data['Settle Price'])
nat_gas_adf = adf_test(nat_gas_data['Settle Price'])
nat_gas_ljung_box_20 = ljung_box_test(nat_gas_data['Settle Price'], lags=20) # Corrected the variable name to nat_gas_ljung_box_20

In [16]:
# Combine results into dataframes
crude_oil_summary = pd.DataFrame([crude_oil_stats, crude_oil_adf, crude_oil_ljung_box_10])
nat_gas_summary = pd.DataFrame([nat_gas_stats, nat_gas_adf, nat_gas_ljung_box_20])

print("Descriptive Statistics for Crude Oil Prices:\n", crude_oil_summary)
print("Descriptive Statistics for Natural Gas Prices:\n", nat_gas_summary)

Descriptive Statistics for Crude Oil Prices:
           Mean  Standard Deviation  Skewness  Kurtosis  JB Statistic  \
0  4388.044953         1451.962734  0.591762 -0.100235    298.143728   
1          NaN                 NaN       NaN       NaN           NaN   
2          NaN                 NaN       NaN       NaN           NaN   

     JB p-value  ADF Statistic   p-value  \
0  1.815148e-65            NaN       NaN   
1           NaN      -2.364871  0.151898   
2           NaN            NaN  0.000000   

                                     Critical Values  LB Statistic  
0                                                NaN           NaN  
1  {'1%': -3.431640711105285, '5%': -2.8621103564...           NaN  
2                                                NaN  49341.867843  
Descriptive Statistics for Natural Gas Prices:
          Mean  Standard Deviation  Skewness  Kurtosis  JB Statistic  \
0  4411.42381         1591.226438  0.051778  1.199522     12.683791   
1         NaN         

In [19]:
# Calculate returns
crude_oil_returns = crude_oil_data['Last Price  (R3)'].pct_change().dropna() # Corrected column name
nat_gas_returns = nat_gas_data['Settle Price'].pct_change().dropna()

# Descriptive statistics for returns
crude_oil_return_stats = descriptive_statistics(crude_oil_returns)
nat_gas_return_stats = descriptive_statistics(nat_gas_returns)

print("Descriptive Statistics for Crude Oil Returns:\n", crude_oil_return_stats)
print("Descriptive Statistics for Natural Gas Returns:\n", nat_gas_return_stats)


Descriptive Statistics for Crude Oil Returns:
 {'Mean': -0.0007919167086414362, 'Standard Deviation': 0.0545755322833896, 'Skewness': -42.355683151248705, 'Kurtosis': 2348.6750879336882, 'JB Statistic': 1181097871.3178437, 'JB p-value': 0.0}
Descriptive Statistics for Natural Gas Returns:
 {'Mean': -0.009890239838885019, 'Standard Deviation': 0.22923282745315562, 'Skewness': -7.827812764839637, 'Kurtosis': 77.55207189327763, 'JB Statistic': 62072.606117851006, 'JB p-value': 0.0}


  crude_oil_returns = crude_oil_data['Last Price  (R3)'].pct_change().dropna() # Corrected column name
  nat_gas_returns = nat_gas_data['Settle Price'].pct_change().dropna()


In [23]:
# Pearson correlation for price series
# Changed 'Last Price' to 'Last Price  (R3)'

# Ensure both series have the same length by taking the minimum length before dropping NaN
min_len = min(len(crude_oil_data['Last Price  (R3)']), len(nat_gas_data['Settle Price']))
crude_oil_prices = crude_oil_data['Last Price  (R3)'][:min_len].replace([np.inf, -np.inf], np.nan).dropna()
nat_gas_prices = nat_gas_data['Settle Price'][:min_len].replace([np.inf, -np.inf], np.nan).dropna()

# Reindex both series to the same index to ensure alignment
common_index = crude_oil_prices.index.intersection(nat_gas_prices.index)
crude_oil_prices = crude_oil_prices.loc[common_index]
nat_gas_prices = nat_gas_prices.loc[common_index]

price_corr = unconditional_pearson_corr(crude_oil_prices, nat_gas_prices)
print("Unconditional Pearson Correlation (Prices):\n", price_corr)


# Pearson correlation for returns series
# Ensure both series have the same length by taking the minimum length before dropping NaN
min_len = min(len(crude_oil_returns), len(nat_gas_returns))
crude_oil_ret = crude_oil_returns[:min_len].replace([np.inf, -np.inf], np.nan).dropna()
nat_gas_ret = nat_gas_returns[:min_len].replace([np.inf, -np.inf], np.nan).dropna()

# Reindex both series to the same index to ensure alignment
common_index = crude_oil_ret.index.intersection(nat_gas_ret.index)
crude_oil_ret = crude_oil_ret.loc[common_index]
nat_gas_ret = nat_gas_ret.loc[common_index]

return_corr = unconditional_pearson_corr(crude_oil_ret, nat_gas_ret)
print("Unconditional Pearson Correlation (Returns):\n", return_corr)

Unconditional Pearson Correlation (Prices):
 {'Correlation': 0.04306755558140864, 'p-value': 0.5367911173456665}
Unconditional Pearson Correlation (Returns):
 {'Correlation': 0.08512178552470345, 'p-value': 0.19254443232407087}


In [32]:
def naive_rolling_forecast(series, window_size, h_step=1):
    forecast_errors = []

    for i in range(window_size, len(series) - h_step):
        # Define the training window
        train = series[i - window_size:i]

        # Naïve forecast: use the last value in the training window
        forecast = [train.iloc[-1]] * h_step  # Repeat last value for h steps

        # Calculate Mean Squared Error for the forecast
        error = np.mean((series[i:i + h_step] - forecast) ** 2)
        forecast_errors.append(error)

    # Return the average forecast error
    return np.mean(forecast_errors) if forecast_errors else None

# Perform forecasting with different window sizes
window_sizes = [500, 600, 650]

In [34]:
for window in window_sizes:
    error_1_step = naive_rolling_forecast(crude_oil_data['Last Price  (R3)'], window_size=window, h_step=1)
    error_h_step = naive_rolling_forecast(crude_oil_data['Last Price  (R3)'], window_size=window, h_step=5)
    print(f"1-step forecast error for window {window}: {error_1_step}")
    print(f"h-step forecast error for window {window}: {error_h_step}")

1-step forecast error for window 500: nan
h-step forecast error for window 500: nan
1-step forecast error for window 600: nan
h-step forecast error for window 600: nan
1-step forecast error for window 650: nan
h-step forecast error for window 650: nan


In [35]:
# Forecast for crude oil returns
for window in window_sizes:
    error_1_step_return = naive_rolling_forecast(crude_oil_returns, window_size=window, h_step=1)
    error_h_step_return = naive_rolling_forecast(crude_oil_returns, window_size=window, h_step=5)
    print(f"1-step return forecast error for window {window}: {error_1_step_return}")
    print(f"h-step return forecast error for window {window}: {error_h_step_return}")

1-step return forecast error for window 500: 0.005126148977248985
h-step return forecast error for window 500: 0.006424478477095376
1-step return forecast error for window 600: 0.005200828280700215
h-step return forecast error for window 600: 0.006525406267955493
1-step return forecast error for window 650: 0.005234940546597101
h-step return forecast error for window 650: 0.006570192119467596
