<a href="https://colab.research.google.com/github/atzingan/DAEN690/blob/main/Forecasting%20Models/TotalHuntersbyStateandSpecies_ARIMA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

#File stored in GitHub LFS
url = 'https://github.com/gjrsas/DAEN690/raw/main/Data/vw_harvest_estimates.csv'

df = pd.read_csv(url)
print(df.shape)

(9797, 23)


In [None]:
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error

def load_data(url):
    return pd.read_csv(url)

def preprocess_data(data):
    data['season'] = pd.to_datetime(data['season'], format='%Y')
    data.sort_values('season', inplace=True)
    return data

def fit_arima_model(train_series, order=(3, 1, 3)):
    arima_model = ARIMA(train_series.astype(np.float64), order=order)
    return arima_model.fit()

def calculate_metrics(actual, predicted):
    rmse = np.sqrt(mean_squared_error(actual, predicted))
    mae = mean_absolute_error(actual, predicted)
    mape = np.mean(np.abs((actual - predicted) / actual)) * 100
    return rmse, mae, mape

def main():
    file_path = url
    data = load_data(url)

    # Group and sum the data
    grouped_data = data.groupby(['survey_state', 'sp_group_estimated', 'season'])['active_hunters'].sum().reset_index()

    # Get unique states and species
    states = grouped_data['survey_state'].unique()
    species_list = grouped_data['sp_group_estimated'].unique()

    # Create a DataFrame to store the results
    results_df = pd.DataFrame(columns=['State', 'Species', 'Prediction', 'RMSE', 'MAE'])

    # Loop through each state and species combination
    for state in states:
        for species in species_list:
            specific_data = grouped_data[(grouped_data['survey_state'] == state) & (grouped_data['sp_group_estimated'] == species)]
            specific_data = preprocess_data(specific_data)

            # Split the data into training and testing sets
            if len(specific_data) > 5:  # Require at least 6 data points to split
                train_data = specific_data.iloc[:-2]  # Leave out the last 2 points for testing
                test_data = specific_data.iloc[-2:]

                time_series_train = train_data.set_index('season')['active_hunters']
                time_series_test = test_data.set_index('season')['active_hunters']

                try:
                    # Fit ARIMA model on training set
                    arima_results = fit_arima_model(time_series_train)

                    # Predict on the test set
                    predictions = arima_results.predict(start=time_series_test.index[0], end=time_series_test.index[-1])
                    rmse, mae, mape = calculate_metrics(time_series_test, predictions)

                    # Forecast the next year beyond the available data
                    forecast = arima_results.get_forecast(steps=1).predicted_mean.iloc[0]

                    # Append the results
                    results_df = results_df.append({
                        'State': state,
                        'Species': species,
                        'Prediction': forecast,
                        'RMSE': rmse,
                        'MAE': mae,
                        'MAPE': mape
                    }, ignore_index=True)

                except Exception as e:
                    print(f"Could not fit ARIMA model for Flyway Region: {state}, Species: {species}. Error: {e}")

    # Save the results to an Excel file
    results_df.to_excel('arima_forecast_results.xlsx', index=False)

if __name__ == "__main__":
    main()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['season'] = pd.to_datetime(data['season'], format='%Y')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.sort_values('season', inplace=True)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  results_df = results_df.append({
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pyda

Could not fit ARIMA model for Flyway Region: IA, Species: mourning dove. Error: 'The `start` argument could not be matched to a location related to the index of the data.'


  results_df = results_df.append({
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['season'] = pd.to_datetime(data['season'], format='%Y')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.sort_values('season', inplace=True)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  results_df = results_df.append({
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/sta

Could not fit ARIMA model for Flyway Region: KS, Species: American woodcock. Error: Found input variables with inconsistent numbers of samples: [2, 3]


  results_df = results_df.append({
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['season'] = pd.to_datetime(data['season'], format='%Y')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.sort_values('season', inplace=True)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

Could not fit ARIMA model for Flyway Region: KS, Species: white-winged dove. Error: 'The `start` argument could not be matched to a location related to the index of the data.'


  results_df = results_df.append({
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['season'] = pd.to_datetime(data['season'], format='%Y')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.sort_values('season', inplace=True)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  results_df = results_df.append({
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.sort_values('season', inplace=True)
A value is

Could not fit ARIMA model for Flyway Region: LA, Species: white-winged dove. Error: 'The `start` argument could not be matched to a location related to the index of the data.'


  results_df = results_df.append({
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['season'] = pd.to_datetime(data['season'], format='%Y')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.sort_values('season', inplace=True)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  results_df = results_df.append({
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the

Could not fit ARIMA model for Flyway Region: MN, Species: mourning dove. Error: LU decomposition error.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['season'] = pd.to_datetime(data['season'], format='%Y')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.sort_values('season', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.sort_values('season', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas

Could not fit ARIMA model for Flyway Region: MS, Species: white-winged dove. Error: 'The `start` argument could not be matched to a location related to the index of the data.'


  results_df = results_df.append({
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['season'] = pd.to_datetime(data['season'], format='%Y')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.sort_values('season', inplace=True)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  results_df = results_df.append({
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/sta

Could not fit ARIMA model for Flyway Region: NE, Species: American woodcock. Error: 'The `start` argument could not be matched to a location related to the index of the data.'


  results_df = results_df.append({
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['season'] = pd.to_datetime(data['season'], format='%Y')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.sort_values('season', inplace=True)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  results_df = results_df.append({
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-ve

Could not fit ARIMA model for Flyway Region: NH, Species: brant. Error: 'The `start` argument could not be matched to a location related to the index of the data.'


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['season'] = pd.to_datetime(data['season'], format='%Y')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.sort_values('season', inplace=True)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  results_df = results_df.append({
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

Could not fit ARIMA model for Flyway Region: OK, Species: American woodcock. Error: 'The `start` argument could not be matched to a location related to the index of the data.'


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['season'] = pd.to_datetime(data['season'], format='%Y')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.sort_values('season', inplace=True)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  results_df = results_df.append({
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['season'] = pd.to_dateti

Could not fit ARIMA model for Flyway Region: TN, Species: American woodcock. Error: 'The `start` argument could not be matched to a location related to the index of the data.'


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['season'] = pd.to_datetime(data['season'], format='%Y')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.sort_values('season', inplace=True)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  results_df = results_df.append({
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pyda

Could not fit ARIMA model for Flyway Region: WI, Species: mourning dove. Error: LU decomposition error.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['season'] = pd.to_datetime(data['season'], format='%Y')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.sort_values('season', inplace=True)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  results_df = results_df.append({
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returnin