In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression

In [2]:
NaturalGas = pd.read_csv('Nat_Gas.csv')
NaturalGas

Unnamed: 0,Dates,Prices
0,10/31/20,10.1
1,11/30/20,10.3
2,12/31/20,11.0
3,1/31/21,10.9
4,2/28/21,10.9
5,3/31/21,10.9
6,4/30/21,10.4
7,5/31/21,9.84
8,6/30/21,10.0
9,7/31/21,10.1


In [3]:
import pandas as pd
import plotly.express as px

# Assuming the CSV has already been loaded
NaturalGas['Dates'] = pd.to_datetime(NaturalGas['Dates'])

# Create the Plotly line chart
fig = px.line(NaturalGas, x='Dates', y='Prices', title='Natural Gas Prices Over Time',
              labels={'Prices': 'Price', 'Dates': 'Date'}, markers=True)

# Show the interactive plot
fig.show()


  NaturalGas['Dates'] = pd.to_datetime(NaturalGas['Dates'])


In [4]:
from statsmodels.tsa.arima.model import ARIMA

# Fit ARIMA model (order can be tuned for better performance)
model_arima = ARIMA(NaturalGas['Prices'], order=(5, 1, 0))
model_arima_fit = model_arima.fit()

# Forecast for the next 12 months
forecast_arima = model_arima_fit.forecast(steps=12)

# Generate future dates for the next 12 months
last_date = NaturalGas['Dates'].iloc[-1]
future_dates = pd.date_range(last_date + pd.DateOffset(months=1), periods=12, freq='M')

# Create DataFrame for ARIMA forecast
forecast_arima_df = pd.DataFrame({
    'Dates': future_dates,
    'Prices': forecast_arima
})



'M' is deprecated and will be removed in a future version, please use 'ME' instead.



In [5]:
# Combine with historical data
forecast_arima_df = pd.concat([NaturalGas, forecast_arima_df], ignore_index=True)



In [6]:
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Fit SARIMA model (order and seasonal_order can be tuned for better performance)
model_sarima = SARIMAX(NaturalGas['Prices'], order=(5, 1, 0), seasonal_order=(1, 1, 1, 12))
model_sarima_fit = model_sarima.fit()

# Forecast for the next 12 months
forecast_sarima = model_sarima_fit.forecast(steps=12)

# Create DataFrame for SARIMA forecast
forecast_sarima_df = pd.DataFrame({
    'Dates': future_dates,
    'Prices': forecast_sarima
})




Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.



In [7]:
# Combine with historical data
forecast_sarima_df = pd.concat([NaturalGas, forecast_sarima_df], ignore_index=True)



In [8]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# Fit Exponential Smoothing model
model_ets = ExponentialSmoothing(NaturalGas['Prices'], seasonal='add', trend='add', seasonal_periods=12)
model_ets_fit = model_ets.fit()

# Forecast for the next 12 months
forecast_ets = model_ets_fit.forecast(steps=12)

# Create DataFrame for Exponential Smoothing forecast
forecast_ets_df = pd.DataFrame({
    'Dates': future_dates,
    'Prices': forecast_ets
})


In [9]:
# Combine with historical data
forecast_ets_df = pd.concat([NaturalGas, forecast_ets_df], ignore_index=True)



In [10]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Plotting
fig = px.line(NaturalGas, x='Dates', y='Prices', title='Natural Gas Prices Over Time')
fig.add_scatter(x=forecast_arima_df['Dates'], y=forecast_arima_df['Prices'], mode='lines', name='ARIMA Forecast')
fig.add_scatter(x=forecast_sarima_df['Dates'], y=forecast_sarima_df['Prices'], mode='lines', name='SARIMA Forecast')
fig.add_scatter(x=forecast_ets_df['Dates'], y=forecast_ets_df['Prices'], mode='lines', name='ETS Forecast')
fig.show()

# Evaluation Metrics
def evaluate_model(model_fit, y_true):
    y_pred = model_fit.fittedvalues
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    aic = model_fit.aic
    bic = model_fit.bic
    return mae, rmse, aic, bic

mae_arima, rmse_arima, aic_arima, bic_arima = evaluate_model(model_arima_fit, NaturalGas['Prices'])
mae_sarima, rmse_sarima, aic_sarima, bic_sarima = evaluate_model(model_sarima_fit, NaturalGas['Prices'])
mae_ets, rmse_ets, aic_ets, bic_ets = evaluate_model(model_ets_fit, NaturalGas['Prices'])

print(f"ARIMA - MAE: {mae_arima}, RMSE: {rmse_arima}, AIC: {aic_arima}, BIC: {bic_arima}")
print(f"SARIMA - MAE: {mae_sarima}, RMSE: {rmse_sarima}, AIC: {aic_sarima}, BIC: {bic_sarima}")
print(f"ETS - MAE: {mae_ets}, RMSE: {rmse_ets}, AIC: {aic_ets}, BIC: {bic_ets}")


ARIMA - MAE: 0.4601144450117369, RMSE: 1.4945361191754003, AIC: 42.15453179610496, BIC: 53.25541740636531
SARIMA - MAE: 0.5111148975412954, RMSE: 1.6584873212096836, AIC: 11.29104086535445, BIC: 23.733825357269758
ETS - MAE: 0.12121829063959222, RMSE: 0.1589264139803229, AIC: -144.57414294826884, BIC: -114.63492677374259


In [11]:
def estimate_price(date_str, historical_data, forecast_data):
    try:
        date = pd.to_datetime(date_str, format='%Y-%m-%d')
    except ValueError:
        return "Invalid date format. Please use YYYY-MM-DD."

    if date in historical_data['Dates'].values:
        # If the date is within the historical data range
        price = historical_data.loc[historical_data['Dates'] == date, 'Prices'].values[0]
        return f"The price on {date_str} was ${price:.2f}."

    elif date in forecast_data['Dates'].values:
        # If the date is within the forecast range
        price = forecast_data.loc[forecast_data['Dates'] == date, 'Prices'].values[0]
        return f"The estimated price on {date_str} is ${price:.2f}."

    elif date > historical_data['Dates'].max() and date <= forecast_data['Dates'].max():
        # If the date is within the forecast period but not in forecast_data directly
        return "The date is within the forecast period but not explicitly forecasted. Please check later dates."

    else:
        return "The date is outside the range of the available data."

def chat_function():
    print("Hello! I'm here to help with your natural gas price inquiries.")
    while True:
        user_input = input("Please enter a date (YYYY-MM-DD) to get a price estimate, or type 'exit' to end the chat: ")
        if user_input.lower() == 'exit':
            print("Goodbye! Have a great day!")
            break
        try:
            result = estimate_price(user_input, NaturalGas, forecast_ets_df)
            print(result)
        except Exception as e:
            print(f"An error occurred: {e}. Please try again.")
            print(f"Debugging Info - Forecast Dates: {forecast_ets_df['Dates'].min()} to {forecast_ets_df['Dates'].max()}")
            print(f"Historical Dates: {NaturalGas['Dates'].min()} to {NaturalGas['Dates'].max()}")

In [13]:
# Start the chat function
chat_function()

Hello! I'm here to help with your natural gas price inquiries.
Please enter a date (YYYY-MM-DD) to get a price estimate, or type 'exit' to end the chat: 2025-07-30
The date is within the forecast period but not explicitly forecasted. Please check later dates.
Please enter a date (YYYY-MM-DD) to get a price estimate, or type 'exit' to end the chat: 2025-07-31
The estimated price on 2025-07-31 is $12.25.
Please enter a date (YYYY-MM-DD) to get a price estimate, or type 'exit' to end the chat: exit
Goodbye! Have a great day!
