<a href="https://colab.research.google.com/github/Joshua250304/JPMC-Research-Project/blob/main/Natural_Price_Gas_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files

print("Please upload the 'Nat_Gas.csv' file.")
uploaded = files.upload()

for fn in uploaded.keys():
  print(f'User uploaded file "{fn}" with length {len(uploaded[fn])} bytes')

# To verify the file is uploaded, you can list the files in the current directory
# !ls

Please upload the 'Nat_Gas.csv' file.


Saving Nat_Gas.csv to Nat_Gas.csv
User uploaded file "Nat_Gas.csv" with length 888 bytes


In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.linear_model import LinearRegression

# 1. Load and Prepare Data
df = pd.read_csv('Nat_Gas.csv')
df['Dates'] = pd.to_datetime(df['Dates'], format='%m/%d/%y')
df = df.sort_values('Dates')

# 2. Feature Engineering
# We create a 'time' index (days from start) and 'month' dummies for seasonality
start_date = df['Dates'].min()
df['days_from_start'] = (df['Dates'] - start_date).dt.days
df['month'] = df['Dates'].dt.month

# Create dummy variables for months (Jan-Dec) to capture seasonal peaks
month_dummies = pd.get_dummies(df['month'], prefix='month')
X = pd.concat([df[['days_from_start']], month_dummies], axis=1)
y = df['Prices']

# 3. Train the Model
model = LinearRegression()
model.fit(X, y)

# 4. Define the Estimation Function
def get_gas_price(input_date_str):
    """
    Takes a date string 'YYYY-MM-DD', calculates features,
    and returns the estimated natural gas price.
    """
    target_date = pd.to_datetime(input_date_str)

    # Calculate days from start
    days = (target_date - start_date).days

    # Identify month and create the matching dummy row
    month = target_date.month
    month_features = [0] * 12
    month_features[month - 1] = 1 # Set the specific month dummy to 1

    # Combine features: [days, month_1, month_2, ..., month_12]
    # Convert to DataFrame with explicit column names to match model.fit(X, y)
    feature_names = ['days_from_start'] + [f'month_{i}' for i in range(1, 13)]
    features_df = pd.DataFrame([[days] + month_features], columns=feature_names)

    predicted_price = model.predict(features_df)[0]

    return round(predicted_price, 2)

# --- Test the function ---
test_date = "2025-05-15"
price = get_gas_price(test_date)
print(f"Estimated Price for {test_date}: ${price}")

Estimated Price for 2025-05-15: $12.12
