In [8]:
import pandas as pd
import numpy as np
from prophet import Prophet


  from .autonotebook import tqdm as notebook_tqdm


In [9]:

# Load your dataset
data = pd.read_csv("data/rm-yields-data.csv")

In [11]:


# Prepare the dataset for Prophet
def prepare_data(column_name):
    df = data[['Year', 'RM', column_name]].dropna()
    df = df.rename(columns={'Year': 'ds', column_name: 'y'})
    return df

def train_and_predict(column_name):
    df = prepare_data(column_name)
    model = Prophet()
    
    # Add the RM data as an additional regressor
    model.add_regressor('RM')
    
    model.fit(df)
    
    # Create future dataframe with RM data
    future_years = pd.DataFrame({'ds': pd.date_range(start='2022', end='2027', freq='Y', closed='right')})
    future_RM = data[['Year', 'RM']].drop_duplicates().tail(5)
    future_RM['Year'] = pd.to_datetime(future_RM['Year'].astype(str), format='%Y')
    future_years['RM'] = future_RM['RM'].values
    
    # Ensure 'Year' values are datetime objects
    df['ds'] = pd.to_datetime(df['ds'].astype(str), format='%Y')
    
    future = pd.concat([df[['ds', 'RM']], future_years], ignore_index=True)
    
    forecast = model.predict(future)
    return forecast


# Forecast the next 5 years for each column
predictions = {}
for column in data.columns[2:]:
    print(f"Predicting {column}...")
    predictions[column] = train_and_predict(column)

# Save predictions to CSV files
for column, prediction in predictions.items():
    prediction.to_csv(f"{column}_predictions.csv", index=False)


13:22:24 - cmdstanpy - INFO - Chain [1] start processing


Predicting Winter Wheat...


13:22:25 - cmdstanpy - INFO - Chain [1] done processing
  future_years = pd.DataFrame({'ds': pd.date_range(start='2022', end='2027', freq='Y', closed='right')})


Predicting Canola...


13:22:26 - cmdstanpy - INFO - Chain [1] start processing
13:22:30 - cmdstanpy - INFO - Chain [1] done processing
  future_years = pd.DataFrame({'ds': pd.date_range(start='2022', end='2027', freq='Y', closed='right')})


Predicting Spring Wheat...


13:22:32 - cmdstanpy - INFO - Chain [1] start processing
13:22:44 - cmdstanpy - INFO - Chain [1] done processing
  future_years = pd.DataFrame({'ds': pd.date_range(start='2022', end='2027', freq='Y', closed='right')})
13:22:48 - cmdstanpy - INFO - Chain [1] start processing


Predicting Mustard...


13:22:48 - cmdstanpy - INFO - Chain [1] done processing
  future_years = pd.DataFrame({'ds': pd.date_range(start='2022', end='2027', freq='Y', closed='right')})


Predicting Durum...


13:22:49 - cmdstanpy - INFO - Chain [1] start processing
13:22:53 - cmdstanpy - INFO - Chain [1] done processing
  future_years = pd.DataFrame({'ds': pd.date_range(start='2022', end='2027', freq='Y', closed='right')})
13:22:55 - cmdstanpy - INFO - Chain [1] start processing
13:22:55 - cmdstanpy - INFO - Chain [1] done processing


Predicting Sunflowers...


  future_years = pd.DataFrame({'ds': pd.date_range(start='2022', end='2027', freq='Y', closed='right')})


Predicting Oats...


13:22:55 - cmdstanpy - INFO - Chain [1] start processing
13:23:05 - cmdstanpy - INFO - Chain [1] done processing
  future_years = pd.DataFrame({'ds': pd.date_range(start='2022', end='2027', freq='Y', closed='right')})


Predicting Lentils...


13:23:09 - cmdstanpy - INFO - Chain [1] start processing
13:23:09 - cmdstanpy - INFO - Chain [1] done processing
  future_years = pd.DataFrame({'ds': pd.date_range(start='2022', end='2027', freq='Y', closed='right')})


Predicting Peas...


13:23:10 - cmdstanpy - INFO - Chain [1] start processing
13:23:13 - cmdstanpy - INFO - Chain [1] done processing
  future_years = pd.DataFrame({'ds': pd.date_range(start='2022', end='2027', freq='Y', closed='right')})


Predicting Barley...


13:23:15 - cmdstanpy - INFO - Chain [1] start processing
13:23:24 - cmdstanpy - INFO - Chain [1] done processing
  future_years = pd.DataFrame({'ds': pd.date_range(start='2022', end='2027', freq='Y', closed='right')})


Predicting Fall Rye...


13:23:28 - cmdstanpy - INFO - Chain [1] start processing
13:23:37 - cmdstanpy - INFO - Chain [1] done processing
  future_years = pd.DataFrame({'ds': pd.date_range(start='2022', end='2027', freq='Y', closed='right')})
13:23:40 - cmdstanpy - INFO - Chain [1] start processing


Predicting Canary Seed...


13:23:40 - cmdstanpy - INFO - Chain [1] done processing
  future_years = pd.DataFrame({'ds': pd.date_range(start='2022', end='2027', freq='Y', closed='right')})
13:23:41 - cmdstanpy - INFO - Chain [1] start processing
13:23:41 - cmdstanpy - INFO - Chain [1] done processing


Predicting Spring Rye...


  future_years = pd.DataFrame({'ds': pd.date_range(start='2022', end='2027', freq='Y', closed='right')})
13:23:41 - cmdstanpy - INFO - Chain [1] start processing


Predicting Tame Hay...


13:23:42 - cmdstanpy - INFO - Chain [1] done processing
  future_years = pd.DataFrame({'ds': pd.date_range(start='2022', end='2027', freq='Y', closed='right')})


Predicting Flax...


13:23:43 - cmdstanpy - INFO - Chain [1] start processing
13:23:56 - cmdstanpy - INFO - Chain [1] done processing
  future_years = pd.DataFrame({'ds': pd.date_range(start='2022', end='2027', freq='Y', closed='right')})
13:23:59 - cmdstanpy - INFO - Chain [1] start processing


Predicting Chickpeas...


13:23:59 - cmdstanpy - INFO - Chain [1] done processing
  future_years = pd.DataFrame({'ds': pd.date_range(start='2022', end='2027', freq='Y', closed='right')})
