In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load the CSV
file_path = '/Users/jahdovanterpool/Downloads/Projects/Job simulations /Nat_Gas.csv'
df = pd.read_csv(file_path)

In [None]:
# Preview data
print(df.head())
print('\n','='*150,'\n')

print(df.info())
print('\n','='*150)

print(df.describe())

In [None]:
# Convert dates from object to datetime
df['Dates'] = pd.to_datetime(df['Dates'], format = '%m/%d/%y')

In [None]:
df.info()

In [None]:
# Sort by date
df_sorted = df.sort_values('Dates')
df_sorted

In [None]:
# Plot the data
plt.figure(figsize=(8,6))
sns.lineplot(data=df_sorted, x = 'Dates', y = 'Prices', marker='o')
plt.title('Natural Gas Prices over time')
plt.xlabel('Dates')
plt.ylabel('Prices')
plt.grid(True)
plt.show()

In [None]:
# Extract the month from each column
df['Month_num'] = df['Dates'].dt.month
df['Month_str'] = df['Dates'].dt.strftime('%b')

# Calculate the average for each month
monthly_avg = df.groupby(['Month_num','Month_str'])['Prices'].mean().reset_index()

# Sort by month value
monthly_avg = monthly_avg.sort_values('Month_num')
monthly_avg

In [None]:
# Plot the monthly averages
plt.figure(figsize=(8,6))
sns.lineplot(data=monthly_avg, x = 'Month_str', y = 'Prices', marker='o')
plt.title('Natural Gas Prices By Month')
plt.xlabel('Month')
plt.ylabel('Prices')
plt.grid(True)
plt.show()

In [None]:
# Create a dictionary with the average price per month
monthly_avg_dict = df.groupby(['Month_num'])['Prices'].mean().to_dict()
monthly_avg_dict

In [None]:
# Create a function to interpolate and extrapolate

def get_price(input_date):
    """
    Returns estimated natural gas price for a given date.
    If date is within known range: interpolation by search for the specific date.
    If date goes beyond the range: extrapolate by using monthly averages.
    """

    # Ensure input is in datetime format
    input_date = pd.to_datetime(input_date)

    # Set the range for interpolation
    min_date = df_sorted['Dates'].min()
    max_date = df_sorted['Dates'].max()

    # Checks if data is within the range for interpolation or extrapolation
    if min_date <= input_date <= max_date:
        df_interp = df_sorted.set_index('Dates').asfreq('D') # Gives a daily frequency even if the date does not occur in the dataframe
        df_interp['Prices'] = df_interp['Prices'].interpolate(method = 'linear')
        return round(df_interp.loc[input_date]['Prices'], 2)

    elif max_date < input_date <= max_date + pd.DateOffset(months=12):
        month = input_date.month
        estimated_price = monthly_avg_dict.get(month, None)
        return round(estimated_price, 2) if estimated_price else None

    else:
        return "Date is out of range! The function can only interpolate or extrapolate up to one year from the max day in data frame"

    return None


In [None]:
# Testing it out

# Allow users to input the desired dates
test_date_interp = input('Enter the date in format dd/mm/yyyy: ')
test_date_extrap = input('Enter the date in format dd/mm/yyyy: ')
test_interp = get_price(test_date_interp)
test_extrap = get_price(test_date_extrap)

# Print results
print(f' When we interpolate for the date: {test_date_interp} we get a price of: ${test_interp}')
print(f' When we extrapolate for the date: {test_date_extrap} we get an estimated price of: ${test_extrap}')

In [None]:
df