In [1]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from matplotlib import pyplot
from pandas import datetime

  from pandas import datetime


In [2]:
## State income data can be found https://www2.census.gov/programs-surveys/cps/tables/time-series/historical-income-households/h08.xlsx

In [3]:
## File requires minor cleaning prior to use.
data = pd.read_csv("state_income.csv")


In [4]:
## State abbreviation file
abbrevs = pd.read_csv('name_list.csv')


In [5]:
## Merge to get abbreviations for each state
data = data.merge(abbrevs, left_on = 'State', right_on = 'US STATE', how = 'left')

In [6]:
## Drop
data = data.drop(8, axis = 0)

In [7]:
## Transform data
data= pd.melt(data,id_vars = 'ABBREVIATION', var_name = 'Year')
data1 = data.pivot(index = 'Year',columns = 'ABBREVIATION')
data1 = data1['value']


In [8]:
## Drop values
data1 = data1.drop(['State', 'US STATE'], axis = 0)

In [9]:
## Exclude rows
data = data[data['Year'] != 'State']
data = data[data['Year'] != 'US STATE']

In [11]:
## Convert index to year format.
data1.index = pd.to_datetime(data1.index, format='%Y')

In [12]:
## Create list of states.
data = data.sort_values(by = "Year")
list_states = list(set(data['ABBREVIATION']))

In [13]:
## Create dataframe for appending predictions.
final = pd.DataFrame()

## Loop through all states.
for state in list_states:
    filtered = data1[state].copy()

    filtered.index = filtered.index.to_period('Y')
    filtered = filtered.astype('int')
    
    ## Initialize values for ARIMA parameters.
    p1 = 4
    d1 = 1
    q1 = 4
    
    ## Create list to store AIC values.
    list_aic = np.empty((p1,d1,q1))
    
    ## Loop through parameter values and store AIC.
    for p in range(p1):
        for d in range(d1):
            for q in range(q1):
                model = ARIMA(filtered, order=(p,d,q))
                model_fit = model.fit()
                list_aic[p,d,q] = model_fit.aic
    
    ## Find parameters which minimize AIC
    list_aic1 = list_aic.flatten()
    index = np.where(list_aic1 == np.amin(list_aic1))[0][0]
    
    opt_q = index % q1
    opt_d = (index % q1*d1) // q1
    opt_p = index // d1*q1
    
    ## Create optimal ARIMA model
    optimal = ARIMA(filtered, order = (opt_p, opt_d, opt_q))
    optimal_fit = optimal.fit()
    
    ## Store output in final dataframe.
    output = pd.DataFrame(filtered.copy())
    addition = pd.DataFrame(optimal_fit.forecast(steps = 5)).rename(columns = {'predicted_mean':state})
    output = output.append(addition)
    output['State'] = state
    final = final.append(output)


MN


  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'


In [14]:
final = final.reset_index()

In [15]:
## Pivot and store resulting file as CSV.
final = final.pivot(index = 'index', columns = 'State').reset_index()
final.to_csv('income_proj_train.csv', index=False)