In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
import datetime

In [2]:
df = pd.read_csv('Household_Heating_Gas.csv')

In [3]:
df

Unnamed: 0,Location,Month,Prices
0,"St. John's, Newfoundland and Labrador",Jan-90,34.4
1,"St. John's, Newfoundland and Labrador",Feb-90,36.6
2,"St. John's, Newfoundland and Labrador",Mar-90,36.6
3,"St. John's, Newfoundland and Labrador",Apr-90,36.7
4,"St. John's, Newfoundland and Labrador",May-90,36.7
...,...,...,...
6586,"Yellowknife, Northwest Territories",Nov-23,199.0
6587,"Yellowknife, Northwest Territories",Dec-23,197.9
6588,"Yellowknife, Northwest Territories",Jan-24,194.9
6589,"Yellowknife, Northwest Territories",Feb-24,175.0


In [4]:
df['Month'] = pd.to_datetime(df['Month'], format='%b-%y')

In [5]:
df['Year'] = df['Month'].dt.year
df['Month_Num'] = df['Month'].dt.month

In [6]:
df = df[pd.to_numeric(df['Prices'], errors='coerce').notnull()]

In [7]:
df['Prices'] = df['Prices'].astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Prices'] = df['Prices'].astype(float)


In [8]:
groups = df.groupby('Location')
df

Unnamed: 0,Location,Month,Prices,Year,Month_Num
0,"St. John's, Newfoundland and Labrador",1990-01-01,34.4,1990.0,1.0
1,"St. John's, Newfoundland and Labrador",1990-02-01,36.6,1990.0,2.0
2,"St. John's, Newfoundland and Labrador",1990-03-01,36.6,1990.0,3.0
3,"St. John's, Newfoundland and Labrador",1990-04-01,36.7,1990.0,4.0
4,"St. John's, Newfoundland and Labrador",1990-05-01,36.7,1990.0,5.0
...,...,...,...,...,...
6586,"Yellowknife, Northwest Territories",2023-11-01,199.0,2023.0,11.0
6587,"Yellowknife, Northwest Territories",2023-12-01,197.9,2023.0,12.0
6588,"Yellowknife, Northwest Territories",2024-01-01,194.9,2024.0,1.0
6589,"Yellowknife, Northwest Territories",2024-02-01,175.0,2024.0,2.0


In [9]:
predicted_df = pd.DataFrame()


In [10]:
def predict_for_group(group):  # Define a function for clarity
    # Splitting the data into features (X) and target variable (y)
    X = group[['Year', 'Month_Num']]
    y = group['Prices']

    # Training a linear regression model
    model = LinearRegression()
    model.fit(X, y)

    # Create a DataFrame for future dates for this location
    max_date = group['Month'].dt.to_period('M').max().to_timestamp('M')  
    next_month_start = max_date + pd.offsets.MonthBegin(1)  
    future_dates = pd.date_range(start=next_month_start, periods=60, freq='M')  

    future_df = pd.DataFrame({'Month': future_dates})
    future_df['Year'] = future_df['Month'].dt.year
    future_df['Month_Num'] = future_df['Month'].dt.month

    # Make predictions for this location
    future_predictions = model.predict(future_df[['Year', 'Month_Num']])

    # Add predictions to the predicted DataFrame
    future_df['Predicted Prices'] = future_predictions
    future_df['Location'] = name

    return future_df  # Return the predictions for this group

# Apply changes to all groups:
predicted_df = pd.DataFrame()  # Initialize empty DataFrame
for name, group in groups:
    predictions = predict_for_group(group)
    predicted_df = pd.concat([predicted_df, predictions])


In [11]:
# Round predicted prices to 1 decimal place
predicted_df['Predicted Prices'] = predicted_df['Predicted Prices'].round(1)
predicted_df['Month'] = predicted_df['Month_Num']
predicted_df.drop('Month_Num', axis=1, inplace=True)
predicted_df.reset_index(drop=True, inplace=True)
predicted_df.head(12)


Unnamed: 0,Month,Year,Predicted Prices,Location
0,4,2024,124.8,"Charlottetown, Prince Edward Island"
1,5,2024,125.1,"Charlottetown, Prince Edward Island"
2,6,2024,125.3,"Charlottetown, Prince Edward Island"
3,7,2024,125.5,"Charlottetown, Prince Edward Island"
4,8,2024,125.8,"Charlottetown, Prince Edward Island"
5,9,2024,126.0,"Charlottetown, Prince Edward Island"
6,10,2024,126.2,"Charlottetown, Prince Edward Island"
7,11,2024,126.5,"Charlottetown, Prince Edward Island"
8,12,2024,126.7,"Charlottetown, Prince Edward Island"
9,1,2025,127.1,"Charlottetown, Prince Edward Island"


In [12]:
predicted_df.to_csv('Predicted_Household_Heating_Gas_Prices.csv', index=False)