In [1]:
# Initial imports.
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [2]:
# Read data
df_us_data = pd.read_csv('Resources/US_data_combined.csv')
df_us_data

Unnamed: 0,Year,Region,NAME,Home Value <$50k (%),"Home Value $50k-$99,999k (%)","Home Value $100k-$149,999k (%)","Home Value $150k-$199,999k (%)","Home Value $200k-$299,999k (%)","Home Value $100k-$299,999 (%)","Home Value $300k-$499,999k (%)",...,Median Real Estate Taxes ($),No 2nd or Equity Loan (%),2nd Mortgage or Equity Loan (%),Equity Loan Only (%),2nd Mortgage Only (%),Both 2nd & Equity Loan (%),Population,Net Population Change,Net Migration,Net Migration Change
0,2010,3,Alabama,9.1,23.6,20.3,17.8,16.8,61.7,8.7,...,568.0,82.3,17.2,12.3,4.9,0.6,4785437,5312,2168,
1,2010,4,Alaska,2.0,2.9,9.6,15.6,35.2,28.1,26.5,...,3177.0,84.4,14.9,10.4,4.5,0.7,713910,3661,1598,
2,2010,4,Arizona,4.0,13.4,20.5,19.5,21.4,53.4,14.4,...,1489.0,77.4,21.8,16.0,5.8,0.8,6407172,14884,5672,
3,2010,3,Arkansas,12.0,27.0,23.9,16.2,12.2,67.1,6.4,...,703.0,89.3,10.5,6.4,4.1,0.2,2921964,5933,3270,
4,2010,4,California,1.5,3.4,6.0,8.6,17.4,18.0,29.1,...,3284.0,72.2,26.3,18.6,7.6,1.6,37319502,64983,-721,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
463,2018,3,Florida,2.6,6.4,,,,55.1,24.6,...,2151.0,89.1,10.5,8.5,2.0,0.4,21244317,280704,267130,12.657811
464,2018,1,Massachusetts,1.0,0.8,,,,26.7,36.2,...,4801.0,81.5,18.1,15.8,2.3,0.5,6882635,22846,11129,1.619656
465,2018,3,District of Columbia,0.8,0.6,,,,8.9,27.1,...,3691.0,84.2,15.6,12.3,3.3,0.2,701547,6641,2431,3.481678
466,2018,4,Utah,1.9,1.2,,,,44.5,37.5,...,1788.0,84.7,14.7,11.0,3.7,0.6,3153550,52508,21841,6.983989


In [3]:
# Drop columns with multiple NaN values
df_us_data.dropna(axis=1, inplace=True)
df_us_data

Unnamed: 0,Year,Region,NAME,Home Value <$50k (%),"Home Value $50k-$99,999k (%)","Home Value $100k-$299,999 (%)","Home Value $300k-$499,999k (%)",Home Value $500k+ (%),Median Home Value ($)(1000X),Median Housing Cost (monthly) ($),...,Income $150k+ (%),Median Income ($)(1000X),No 2nd or Equity Loan (%),2nd Mortgage or Equity Loan (%),Equity Loan Only (%),2nd Mortgage Only (%),Both 2nd & Equity Loan (%),Population,Net Population Change,Net Migration
0,2010,3,Alabama,9.1,23.6,61.7,8.7,3.7,142.7,1130,...,8.7,61.964,82.3,17.2,12.3,4.9,0.6,4785437,5312,2168
1,2010,4,Alaska,2.0,2.9,28.1,26.5,8.1,255.7,1772,...,19.1,94.747,84.4,14.9,10.4,4.5,0.7,713910,3661,1598
2,2010,4,Arizona,4.0,13.4,53.4,14.4,6.8,177.0,1442,...,10.2,66.539,77.4,21.8,16.0,5.8,0.8,6407172,14884,5672
3,2010,3,Arkansas,12.0,27.0,67.1,6.4,2.1,122.6,987,...,7.1,59.393,89.3,10.5,6.4,4.1,0.2,2921964,5933,3270
4,2010,4,California,1.5,3.4,18.0,29.1,34.0,377.7,2242,...,21.2,88.444,72.2,26.3,18.6,7.6,1.6,37319502,64983,-721
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
463,2018,3,Florida,2.6,6.4,55.1,24.6,11.4,247.0,1471,...,18.4,80.372,89.1,10.5,8.5,2.0,0.4,21244317,280704,267130
464,2018,1,Massachusetts,1.0,0.8,26.7,36.2,35.4,406.9,2207,...,37.3,121.565,81.5,18.1,15.8,2.3,0.5,6882635,22846,11129
465,2018,3,District of Columbia,0.8,0.6,8.9,27.1,62.7,627.0,2506,...,50.6,151.147,84.2,15.6,12.3,3.3,0.2,701547,6641,2431
466,2018,4,Utah,1.9,1.2,44.5,37.5,14.9,310.0,1531,...,19.3,93.060,84.7,14.7,11.0,3.7,0.6,3153550,52508,21841


In [4]:
# List columns for calculations
cols = df_us_data.columns.to_list()
print(cols)

['Year', 'Region', 'NAME', 'Home Value <$50k (%)', 'Home Value $50k-$99,999k (%)', 'Home Value $100k-$299,999 (%)', 'Home Value $300k-$499,999k (%)', 'Home Value $500k+ (%)', 'Median Home Value ($)(1000X)', 'Median Housing Cost (monthly) ($)', 'Income <$10k (%)', 'Income $10k-$24,999 (%)', 'Income $25k-$34,999 (%)', 'Income $35k-$49,999 (%)', 'Income $50k-$74,999 (%)', 'Income $75k-$99,999 (%)', 'Income $100k-$149,999 (%)', 'Income $150k+ (%)', 'Median Income ($)(1000X)', 'No 2nd or Equity Loan (%)', '2nd Mortgage or Equity Loan (%)', 'Equity Loan Only (%)', '2nd Mortgage Only (%)', 'Both 2nd & Equity Loan (%)', 'Population', 'Net Population Change', 'Net Migration']


In [5]:
# Create array for binary category: if Median Home Value exceeds 3 times the Median Income, time to leave
leave = []
for i in range(len(df_us_data)):
    if df_us_data['Median Home Value ($)(1000X)'][i] > 3 * df_us_data['Median Income ($)(1000X)'][i]:
        leave.append(1)
    else:
        leave.append(0)
    i += 1     
leave

[0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,


In [6]:
# Create binary column in df_us_data
df_us_data['Leave'] = leave
df_us_data

Unnamed: 0,Year,Region,NAME,Home Value <$50k (%),"Home Value $50k-$99,999k (%)","Home Value $100k-$299,999 (%)","Home Value $300k-$499,999k (%)",Home Value $500k+ (%),Median Home Value ($)(1000X),Median Housing Cost (monthly) ($),...,Median Income ($)(1000X),No 2nd or Equity Loan (%),2nd Mortgage or Equity Loan (%),Equity Loan Only (%),2nd Mortgage Only (%),Both 2nd & Equity Loan (%),Population,Net Population Change,Net Migration,Leave
0,2010,3,Alabama,9.1,23.6,61.7,8.7,3.7,142.7,1130,...,61.964,82.3,17.2,12.3,4.9,0.6,4785437,5312,2168,0
1,2010,4,Alaska,2.0,2.9,28.1,26.5,8.1,255.7,1772,...,94.747,84.4,14.9,10.4,4.5,0.7,713910,3661,1598,0
2,2010,4,Arizona,4.0,13.4,53.4,14.4,6.8,177.0,1442,...,66.539,77.4,21.8,16.0,5.8,0.8,6407172,14884,5672,0
3,2010,3,Arkansas,12.0,27.0,67.1,6.4,2.1,122.6,987,...,59.393,89.3,10.5,6.4,4.1,0.2,2921964,5933,3270,0
4,2010,4,California,1.5,3.4,18.0,29.1,34.0,377.7,2242,...,88.444,72.2,26.3,18.6,7.6,1.6,37319502,64983,-721,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
463,2018,3,Florida,2.6,6.4,55.1,24.6,11.4,247.0,1471,...,80.372,89.1,10.5,8.5,2.0,0.4,21244317,280704,267130,1
464,2018,1,Massachusetts,1.0,0.8,26.7,36.2,35.4,406.9,2207,...,121.565,81.5,18.1,15.8,2.3,0.5,6882635,22846,11129,1
465,2018,3,District of Columbia,0.8,0.6,8.9,27.1,62.7,627.0,2506,...,151.147,84.2,15.6,12.3,3.3,0.2,701547,6641,2431,1
466,2018,4,Utah,1.9,1.2,44.5,37.5,14.9,310.0,1531,...,93.060,84.7,14.7,11.0,3.7,0.6,3153550,52508,21841,1


In [7]:
# Create States DataFrame with names of states
state_names_df = df_us_data[['Year', 'Region', 'NAME']]
state_names_df.head()

Unnamed: 0,Year,Region,NAME
0,2010,3,Alabama
1,2010,4,Alaska
2,2010,4,Arizona
3,2010,3,Arkansas
4,2010,4,California


In [8]:
# Remove Region and NAME columns to prepare for ML
clean_us_data_df = df_us_data.drop(columns=['Region', 'NAME'])
clean_us_data_df

Unnamed: 0,Year,Home Value <$50k (%),"Home Value $50k-$99,999k (%)","Home Value $100k-$299,999 (%)","Home Value $300k-$499,999k (%)",Home Value $500k+ (%),Median Home Value ($)(1000X),Median Housing Cost (monthly) ($),Income <$10k (%),"Income $10k-$24,999 (%)",...,Median Income ($)(1000X),No 2nd or Equity Loan (%),2nd Mortgage or Equity Loan (%),Equity Loan Only (%),2nd Mortgage Only (%),Both 2nd & Equity Loan (%),Population,Net Population Change,Net Migration,Leave
0,2010,9.1,23.6,61.7,8.7,3.7,142.7,1130,3.9,10.4,...,61.964,82.3,17.2,12.3,4.9,0.6,4785437,5312,2168,0
1,2010,2.0,2.9,28.1,26.5,8.1,255.7,1772,1.2,4.0,...,94.747,84.4,14.9,10.4,4.5,0.7,713910,3661,1598,0
2,2010,4.0,13.4,53.4,14.4,6.8,177.0,1442,2.8,8.8,...,66.539,77.4,21.8,16.0,5.8,0.8,6407172,14884,5672,0
3,2010,12.0,27.0,67.1,6.4,2.1,122.6,987,2.8,10.9,...,59.393,89.3,10.5,6.4,4.1,0.2,2921964,5933,3270,0
4,2010,1.5,3.4,18.0,29.1,34.0,377.7,2242,2.1,5.9,...,88.444,72.2,26.3,18.6,7.6,1.6,37319502,64983,-721,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
463,2018,2.6,6.4,55.1,24.6,11.4,247.0,1471,2.8,7.3,...,80.372,89.1,10.5,8.5,2.0,0.4,21244317,280704,267130,1
464,2018,1.0,0.8,26.7,36.2,35.4,406.9,2207,1.3,3.1,...,121.565,81.5,18.1,15.8,2.3,0.5,6882635,22846,11129,1
465,2018,0.8,0.6,8.9,27.1,62.7,627.0,2506,2.2,2.8,...,151.147,84.2,15.6,12.3,3.3,0.2,701547,6641,2431,1
466,2018,1.9,1.2,44.5,37.5,14.9,310.0,1531,1.0,3.7,...,93.060,84.7,14.7,11.0,3.7,0.6,3153550,52508,21841,1


In [9]:
# Check datatypes for all columns in preparation of ML
clean_us_data_df.dtypes

Year                                   int64
Home Value <$50k (%)                 float64
Home Value $50k-$99,999k (%)         float64
Home Value $100k-$299,999 (%)        float64
Home Value $300k-$499,999k (%)       float64
Home Value $500k+ (%)                float64
Median Home Value ($)(1000X)         float64
Median Housing Cost (monthly) ($)      int64
Income <$10k (%)                     float64
Income $10k-$24,999 (%)              float64
Income $25k-$34,999 (%)              float64
Income $35k-$49,999 (%)              float64
Income $50k-$74,999 (%)              float64
Income $75k-$99,999 (%)              float64
Income $100k-$149,999 (%)            float64
Income $150k+ (%)                    float64
Median Income ($)(1000X)             float64
No 2nd or Equity Loan (%)            float64
2nd Mortgage or Equity Loan (%)      float64
Equity Loan Only (%)                 float64
2nd Mortgage Only (%)                float64
Both 2nd & Equity Loan (%)           float64
Population

## Preprocess data

In [10]:
# Define the features set
X = clean_us_data_df.copy()
X = X.drop('Leave', axis=1)
X.head()

Unnamed: 0,Year,Home Value <$50k (%),"Home Value $50k-$99,999k (%)","Home Value $100k-$299,999 (%)","Home Value $300k-$499,999k (%)",Home Value $500k+ (%),Median Home Value ($)(1000X),Median Housing Cost (monthly) ($),Income <$10k (%),"Income $10k-$24,999 (%)",...,Income $150k+ (%),Median Income ($)(1000X),No 2nd or Equity Loan (%),2nd Mortgage or Equity Loan (%),Equity Loan Only (%),2nd Mortgage Only (%),Both 2nd & Equity Loan (%),Population,Net Population Change,Net Migration
0,2010,9.1,23.6,61.7,8.7,3.7,142.7,1130,3.9,10.4,...,8.7,61.964,82.3,17.2,12.3,4.9,0.6,4785437,5312,2168
1,2010,2.0,2.9,28.1,26.5,8.1,255.7,1772,1.2,4.0,...,19.1,94.747,84.4,14.9,10.4,4.5,0.7,713910,3661,1598
2,2010,4.0,13.4,53.4,14.4,6.8,177.0,1442,2.8,8.8,...,10.2,66.539,77.4,21.8,16.0,5.8,0.8,6407172,14884,5672
3,2010,12.0,27.0,67.1,6.4,2.1,122.6,987,2.8,10.9,...,7.1,59.393,89.3,10.5,6.4,4.1,0.2,2921964,5933,3270
4,2010,1.5,3.4,18.0,29.1,34.0,377.7,2242,2.1,5.9,...,21.2,88.444,72.2,26.3,18.6,7.6,1.6,37319502,64983,-721


In [11]:
# Define the target set
y = clean_us_data_df['Leave'].ravel()
y[:5]

array([0, 0, 0, 0, 1])

In [12]:
# Split into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [13]:
# Create instance of StandardScaler
scaler = StandardScaler()
# Fit StandardScaler with training data
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Fit the Random Forest Model

In [14]:
# Create a Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

# Fit the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Make predictions using test data
predictions = rf_model.predict(X_test_scaled)
predictions

array([0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 1, 0, 0, 0, 1, 0])

## Evaluate the Model

In [15]:
# Calculating the confusion matrix.
cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])
# Calculating the accuracy score
acc_score = accuracy_score(y_test, predictions)

In [16]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score: {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,84,0
Actual 1,1,32


Accuracy Score: 0.9914529914529915
Classification Report
              precision    recall  f1-score   support

           0       0.99      1.00      0.99        84
           1       1.00      0.97      0.98        33

    accuracy                           0.99       117
   macro avg       0.99      0.98      0.99       117
weighted avg       0.99      0.99      0.99       117



In [17]:
# Calculate feature importance in the Random Forest model.
importances = rf_model.feature_importances_
importances

array([0.0059763 , 0.04424817, 0.06208391, 0.07680767, 0.07662335,
       0.13760372, 0.13147475, 0.07374524, 0.02899293, 0.02868553,
       0.01824688, 0.02483927, 0.03192922, 0.02301637, 0.02527917,
       0.03639885, 0.04144885, 0.01616542, 0.01530687, 0.02010353,
       0.00969068, 0.00527124, 0.01201126, 0.02400596, 0.03004486])

In [18]:
# Sort the features by their importance.
sorted(zip(rf_model.feature_importances_, X.columns), reverse=True)

[(0.13760371602950347, 'Home Value $500k+ (%)'),
 (0.1314747457219969, 'Median Home Value ($)(1000X)'),
 (0.07680767324231297, 'Home Value $100k-$299,999 (%)'),
 (0.07662335492497425, 'Home Value $300k-$499,999k (%)'),
 (0.07374523855725952, 'Median Housing Cost (monthly) ($)'),
 (0.06208391143995185, 'Home Value $50k-$99,999k (%)'),
 (0.04424816859428962, 'Home Value <$50k (%)'),
 (0.04144885061579816, 'Median Income ($)(1000X)'),
 (0.03639885227566998, 'Income $150k+ (%)'),
 (0.0319292213454252, 'Income $50k-$74,999 (%)'),
 (0.03004486060984392, 'Net Migration'),
 (0.028992926690844793, 'Income <$10k (%)'),
 (0.02868552759938277, 'Income $10k-$24,999 (%)'),
 (0.025279169706487598, 'Income $100k-$149,999 (%)'),
 (0.024839269398742963, 'Income $35k-$49,999 (%)'),
 (0.02400596289504071, 'Net Population Change'),
 (0.023016371229433147, 'Income $75k-$99,999 (%)'),
 (0.020103527705060752, 'Equity Loan Only (%)'),
 (0.018246877596158683, 'Income $25k-$34,999 (%)'),
 (0.016165418309172515, 

In [19]:
# Get columns from df_us_data
cols2 = df_us_data.columns.to_list()
print(cols2)

['Year', 'Region', 'NAME', 'Home Value <$50k (%)', 'Home Value $50k-$99,999k (%)', 'Home Value $100k-$299,999 (%)', 'Home Value $300k-$499,999k (%)', 'Home Value $500k+ (%)', 'Median Home Value ($)(1000X)', 'Median Housing Cost (monthly) ($)', 'Income <$10k (%)', 'Income $10k-$24,999 (%)', 'Income $25k-$34,999 (%)', 'Income $35k-$49,999 (%)', 'Income $50k-$74,999 (%)', 'Income $75k-$99,999 (%)', 'Income $100k-$149,999 (%)', 'Income $150k+ (%)', 'Median Income ($)(1000X)', 'No 2nd or Equity Loan (%)', '2nd Mortgage or Equity Loan (%)', 'Equity Loan Only (%)', '2nd Mortgage Only (%)', 'Both 2nd & Equity Loan (%)', 'Population', 'Net Population Change', 'Net Migration', 'Leave']


In [21]:
# Create new DataFrame for table on Dashboard
us_data_table = df_us_data[['Year', 'NAME', 'Median Home Value ($)(1000X)', 'Median Housing Cost (monthly) ($)', 'Median Income ($)(1000X)', 'Population', 'Net Population Change', 'Net Migration', 'Leave']]
us_data_table

Unnamed: 0,Year,NAME,Median Home Value ($)(1000X),Median Housing Cost (monthly) ($),Median Income ($)(1000X),Population,Net Population Change,Net Migration,Leave
0,2010,Alabama,142.7,1130,61.964,4785437,5312,2168,0
1,2010,Alaska,255.7,1772,94.747,713910,3661,1598,0
2,2010,Arizona,177.0,1442,66.539,6407172,14884,5672,0
3,2010,Arkansas,122.6,987,59.393,2921964,5933,3270,0
4,2010,California,377.7,2242,88.444,37319502,64983,-721,1
...,...,...,...,...,...,...,...,...,...
463,2018,Florida,247.0,1471,80.372,21244317,280704,267130,1
464,2018,Massachusetts,406.9,2207,121.565,6882635,22846,11129,1
465,2018,District of Columbia,627.0,2506,151.147,701547,6641,2431,1
466,2018,Utah,310.0,1531,93.060,3153550,52508,21841,1


In [25]:
# Rename some columns to make user friendly
us_data_table = us_data_table.rename(columns={"NAME": "State", "Median Home Value ($)(1000X)": "Median Home Value (in thousands USD)", "Median Housing Cost (monthly) ($)": "Median Monthly Housing Cost (USD)", "Median Income ($)(1000X)": "Median Income (in thousands USD)"})
us_data_table

Unnamed: 0,Year,State,Median Home Value (in thousands USD),Median Monthly Housing Cost (USD),Median Income (in thousands USD),Population,Net Population Change,Net Migration,Leave
0,2010,Alabama,142.7,1130,61.964,4785437,5312,2168,0
1,2010,Alaska,255.7,1772,94.747,713910,3661,1598,0
2,2010,Arizona,177.0,1442,66.539,6407172,14884,5672,0
3,2010,Arkansas,122.6,987,59.393,2921964,5933,3270,0
4,2010,California,377.7,2242,88.444,37319502,64983,-721,1
...,...,...,...,...,...,...,...,...,...
463,2018,Florida,247.0,1471,80.372,21244317,280704,267130,1
464,2018,Massachusetts,406.9,2207,121.565,6882635,22846,11129,1
465,2018,District of Columbia,627.0,2506,151.147,701547,6641,2431,1
466,2018,Utah,310.0,1531,93.060,3153550,52508,21841,1


In [27]:
# Convert DataFrame to HTML
us_data_table.to_html("Resources/us_data.html", classes="table table-striped")