### Imports

In [1]:
import sqlite3 as sql
import pandas as pd
import torch.nn as nn
import torch
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from sklearn.preprocessing import OneHotEncoder
import tqdm
import random
import matplotlib.pyplot as plt
from models import *
from utils import *

### Model Loading

In [2]:
import pickle
# Load the model from the file
with open('models/model_combined.pkl', 'rb') as f:
    model, loss_data= pickle.load(f)

In [None]:
def generate_state_predictions(model, unemployment_rate, industry_code):
    """
    Generate predictions for each state based on the given model and encoders.

    Parameters:
    model (torch.nn.Module): The trained model for prediction.
    state_encoder (OneHotEncoder): Encoder for state names.
    industry_encoder (OneHotEncoder): Encoder for industry codes.
    state_dict (dict): Dictionary mapping state indices to state abbreviations.
    naics_codes (list): List of NAICS industry codes.
    industry_code (str): The NAICS code for the industry of interest.

    Returns:
    pd.DataFrame: DataFrame containing state abbreviations and their corresponding predictions.
    """
    # Given our parameters, we can now generate a prediction for each state
    all_inputs = [torch.tensor(np.concatenate([
                    np.array([unemployment_rate]).reshape(-1, 1),
                    state_encoder.transform([[state]]).reshape(-1, 1),
                    industry_encoder.transform([[industry_code]]).reshape(-1, 1)
                ], axis=0), dtype=torch.float32).T for state in state_encoder.categories_[0]]

    # Generate predictions using the model
    predictions = np.array([model.predict(input) for input in all_inputs]).squeeze()

    # Create a dataframe with the predictions and state abbreviations
    df_predictions = pd.DataFrame({
        'State': [state_dict[state] for state in state_encoder.categories_[0]],
        'Prediction': predictions
    })

    return df_predictions

#### Example Use: Choosing a given unemployment and industry, we get prediction values for each state:

In [45]:
industry_code = random.choice(naics_codes)
unemployment_rate = round(random.uniform(3.0, 10.0), 2)
print(f"Predictions for industry: {industry_dict_abbrev[industry_code]} and Unemployment Rate: {unemployment_rate}%")
df_predictions = generate_state_predictions(model, unemployment_rate, int(industry_code))
print(df_predictions.head(10))

Predictions for industry: Mining, Quarrying, and Oil and Gas Extraction and Unemployment Rate: 8.44%
  State  Prediction
0    AL   89.719345
1    AK   89.753082
2    AZ   87.285820
3    AR   88.651451
4    CA   86.420784
5    CO   87.714851
6    CT   90.035545
7    DE   84.606300
8    DC   86.638634
9    FL   87.624466
