In [3]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.18.0-cp312-cp312-win_amd64.whl.metadata (3.3 kB)
Collecting tensorflow-intel==2.18.0 (from tensorflow)
  Downloading tensorflow_intel-2.18.0-cp312-cp312-win_amd64.whl.metadata (4.9 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow-intel==2.18.0->tensorflow)
  Using cached flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensor

In [5]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [24]:
data = pd.read_csv('FinalData.csv')

#### Display of Dataset

In [27]:
data

Unnamed: 0.1,Unnamed: 0,FIPS,County,Year,FIR,IncomePerCapita,FinalUR,FinalCPM,MajorityRace
0,0,1001,"Autauga, Alabama",2021,13.3,48347.0,2.8,3.60,White
1,1,1003,"Baldwin, Alabama",2021,11.8,54659.0,2.9,4.07,White
2,2,1005,"Barbour, Alabama",2021,17.8,40428.0,5.5,3.46,White
3,3,1007,"Bibb, Alabama",2021,14.9,36892.0,3.4,3.29,White
4,4,1009,"Blount, Alabama",2021,13.7,42634.0,2.4,3.24,White
...,...,...,...,...,...,...,...,...,...
34562,34562,56037,"Sweetwater, Wyoming",2011,10.9,47924.0,5.6,2.71,White
34563,34563,56039,"Teton, Wyoming",2011,13.5,152328.0,8.0,3.84,White
34564,34564,56041,"Uinta, Wyoming",2011,11.5,40668.0,6.6,2.49,White
34565,34565,56043,"Washakie, Wyoming",2011,11.7,42857.0,5.9,2.77,White


In [16]:
def prepareSequences(data, lookback = 3):
    features = ['FIR', 'IncomePerCapita', 'FinalUR', 'FinalCPM']
    for col in features:
        data[col] = pd.to_numeric(data[col], errors = 'coerce')
        
    data['FIPS'] = data['FIPS'].astype(int)
    data = data.sort_values(['FIPS', 'Year'])
    scaler = StandardScaler()
    data[features] = scaler.fit_transform(data[features])

    X, y = [], []
    for county in data['FIPS'].unique():
        countyData = data[data['FIPS'] == county]
        if len(countyData) < lookback + 1:
            continue
        for i in range(len(countyData) - lookback):
            X.append(countyData[features].iloc[i : (i + lookback)].values)
            y.append(countyData['FIR'].iloc[i + lookback])
                     
    return np.array(X), np.array(y), scaler

In [18]:
def createModel(lookback, n_features):
    model = Sequential([
        LSTM(64, input_shape=(lookback, n_features), return_sequences = True),
        Dropout(0.2),
        LSTM(32),
        Dropout(0.2),
        Dense(16, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

In [43]:
def predictCounty(fipsCode, model, data, scaler, lookback = 3):
    features = ['FIR', 'IncomePerCapita', 'FinalUR', 'FinalCPM']
    countyData = data[data['FIPS'] == fipsCode].sort_values('Year')
    countyData = countyData.tail(lookback)
    if len(countyData) < lookback:
        return None, "Insufficient Data"
    countyData[features] = scaler.transform(countyData[features])
    X = countyData[features].values.reshape(1, lookback, len(features))
    pred_scaled = model.predict(X)
    pred_reshaped = np.zeros((1, len(features)))
    pred_reshaped[0, 0] = pred_scaled[0]
    pred = scaler.inverse_transform(pred_reshaped)[0, 0]
    return pred, None

In [31]:
trainData = data[data['Year'] < 2021].copy()
testData = data[data['Year'] == 2021]. copy()
X_train, y_train, scaler = prepareSequences(trainData)
n_features = X_train.shape[2]

In [33]:
model = createModel(lookback = 3, n_features = n_features)

  super().__init__(**kwargs)


In [35]:
def evaluatePredictions(testData, model, trainData, scaler):
    results = []
    for county in testData['FIPS'].unique()[:20]:
        pred, error = predictCounty(county, model, trainData, scaler)
        if not error:
            actual = testData[testData['FIPS'] == county]['FIR'].iloc[0]
            results.append({
                'county_id': county,
                'actual': actual,
                'predicted': pred,
                'error': abs(actual - pred)
            })
    results_df = pd.DataFrame(results)
    return results_df            

In [45]:
results_df = evaluatePredictions(testData, model, trainData, scaler)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 135ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step


  pred_reshaped[0, 0] = pred_scaled[0]
  pred_reshaped[0, 0] = pred_scaled[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step

  pred_reshaped[0, 0] = pred_scaled[0]
  pred_reshaped[0, 0] = pred_scaled[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step

  pred_reshaped[0, 0] = pred_scaled[0]
  pred_reshaped[0, 0] = pred_scaled[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step


  pred_reshaped[0, 0] = pred_scaled[0]
  pred_reshaped[0, 0] = pred_scaled[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step


  pred_reshaped[0, 0] = pred_scaled[0]
  pred_reshaped[0, 0] = pred_scaled[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step


  pred_reshaped[0, 0] = pred_scaled[0]
  pred_reshaped[0, 0] = pred_scaled[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step

  pred_reshaped[0, 0] = pred_scaled[0]
  pred_reshaped[0, 0] = pred_scaled[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step

  pred_reshaped[0, 0] = pred_scaled[0]
  pred_reshaped[0, 0] = pred_scaled[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step

  pred_reshaped[0, 0] = pred_scaled[0]
  pred_reshaped[0, 0] = pred_scaled[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step


  pred_reshaped[0, 0] = pred_scaled[0]
  pred_reshaped[0, 0] = pred_scaled[0]


In [49]:
print("\nPrediction Results Summary:")
print(f"Mean Absolute Error: {results_df['error'].mean():.2f}")
print(f"Median Absolute Error: {results_df['error'].median():.2f}")
print("\nTop 5 Most Accurate Predictions:")
print(results_df.nsmallest(5, 'error')[['county_id', 'actual', 'predicted', 'error']])
print("\nTop 5 Least Accurate Predictions:")
print(results_df.nlargest(5, 'error')[['county_id', 'actual', 'predicted', 'error']])


Prediction Results Summary:
Mean Absolute Error: 1.32
Median Absolute Error: 1.28

Top 5 Most Accurate Predictions:
    county_id  actual  predicted     error
4        1009    13.7  13.934692  0.234692
6        1013    13.8  14.048081  0.248081
15       1031    14.6  14.122941  0.477059
8        1017    13.3  13.928321  0.628321
9        1019    14.7  13.986006  0.713994

Top 5 Least Accurate Predictions:
    county_id  actual  predicted     error
2        1005    17.8  14.151394  3.648606
1        1003    11.8  14.140072  2.340072
12       1025    16.3  14.058573  2.241427
19       1039    15.9  14.001964  1.898036
5        1011    15.6  13.998015  1.601985
