# Decision Tree Regressor

In [1]:
# Import dependencies
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import export_graphviz
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score,mean_squared_error

In [2]:
# Load the data
df = pd.read_csv('../Data_Preprocessing/Raw_Data/dataset_na_dropped.csv')
df.head()

Unnamed: 0,year,state,population_million,education_million,welfare_million,crime_rate,unemployment_rate,divorce_rate_per_1000_people,homeownership_rate,minimum_wage_effective,CPI_Average,avg_wage_index,poverty_rate
0,1976,National,216.945,107290.0,72155.2,467.8,7.786562,5.0,7.79,2.226667,56.9,9226.48,14.1
1,1977,National,219.307,115893.0,83417.8,475.9,7.132505,5.1,7.13,2.226667,60.6,9779.44,13.92
2,1978,National,221.694,128541.0,81037.9,497.8,6.134198,5.2,6.13,2.226667,65.2,10556.03,13.43
3,1979,National,224.107,140169.0,83691.3,548.9,5.923827,5.3,5.92,2.91,72.6,11479.46,13.33
4,1980,National,226.546,153686.0,108251.7,596.6,7.255717,5.2,7.26,3.110196,82.4,12513.46,14.65


In [3]:
# Drop state column
df.drop(columns = ['state', 'year'], inplace = True)

In [4]:
# Split data for training and testing
X = df.drop(columns = 'poverty_rate')
y = df['poverty_rate']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [5]:
# Create regressor and fit it with the data
regressor = DecisionTreeRegressor(random_state = 0)
regressor.fit(X_train, y_train)

DecisionTreeRegressor(random_state=0)

In [6]:
# Predicting the values for the X_test
y_pred = regressor.predict(X_test)

In [7]:
# Check r squared value
r_squared = r2_score(y_test, y_pred)
print(f'R-Squared value: {r_squared}')

R-Squared value: 0.6105957137870786


In [8]:
# Checking mean squared error and root mean squared error
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print(f'Mean Squared Error: {mse} \n' f'Root Mean Squared Error: {rmse}')

Mean Squared Error: 7.912473684210527 
Root Mean Squared Error: 2.8129119581335154


In [9]:
# Creating a dataframe with actual vs predicted poverty rates
pred_df = pd.DataFrame({'Actual':y_test, 'Predicted':y_pred})
pred_df

Unnamed: 0,Actual,Predicted
75,19.20,19.0
698,16.40,14.5
839,14.30,11.7
1486,11.90,12.8
1006,21.50,22.1
...,...,...
880,10.60,10.1
1266,14.10,12.7
12,14.81,14.3
1504,26.60,24.7


In [10]:
# List the features sorted in descending order by feature importance
importances = regressor.feature_importances_
ranked_importance = sorted(zip(importances, X.columns), reverse=True)
for i in range(len(ranked_importance)):
    print(f'{ranked_importance[i][1]}: ({ranked_importance[i][0]})')

minimum_wage_effective: (0.2302245989872258)
unemployment_rate: (0.18585296633763987)
crime_rate: (0.1513443172342636)
population_million: (0.1369710982533281)
homeownership_rate: (0.12022967645472075)
education_million: (0.0780218235029268)
welfare_million: (0.06603348252511261)
avg_wage_index: (0.017121627168163768)
divorce_rate_per_1000_people: (0.008035852832103178)
CPI_Average: (0.006164556704515544)


In [28]:
# Defining function to pull data from a row and split it into features and target
def feat_targ_vals(row):
    feature_vals = []
    target_vals = []
    for i in range(len(df.columns)):
        feature_vals.append(df.iloc[row][i])
    target_vals = feature_vals.pop()
    return feature_vals, target_vals

In [29]:
# Predicting a poverty value using an existing row (75). Row 75 was in test set
prediction = regressor.predict([feat_targ_vals(75)[0]])
actual = feat_targ_vals(75)[1]
print(f'Predicted Value: {prediction[0]} \nActual Value: {actual}')

Predicted Value: 19.0 
Actual Value: 19.2


In [56]:
# Checking features and values for custom input
X.tail(15)

Unnamed: 0,population_million,education_million,welfare_million,crime_rate,unemployment_rate,divorce_rate_per_1000_people,homeownership_rate,minimum_wage_effective,CPI_Average,avg_wage_index
1579,0.494,1140.1,129.6,266.5,3.8,4.0,71.0,5.15,172.2,32154.82
1580,0.493,1226.4,192.6,257.6,3.8,4.0,73.5,5.15,177.1,32921.92
1581,0.497,1300.7,170.8,273.4,4.0,3.9,73.0,5.15,179.9,33252.09
1582,0.499,1385.8,268.9,261.7,4.3,3.8,72.9,5.15,184.0,34064.95
1583,0.503,1458.8,236.8,229.9,3.8,3.7,72.8,5.15,188.9,35648.55
1584,0.506,1603.6,223.6,230.3,3.6,3.6,72.8,5.15,195.3,36952.94
1585,0.513,1726.4,201.0,253.8,3.0,3.7,73.7,5.15,201.6,38651.41
1586,0.523,2060.3,239.4,257.1,2.6,3.6,73.2,5.15,207.342,40405.48
1587,0.533,2286.7,259.5,249.7,2.9,3.5,73.3,5.85,215.303,41334.97
1588,0.544,2503.6,342.2,219.7,6.3,3.5,73.8,6.55,214.537,40711.61


In [55]:
# Creating function to take custom inputs
def predict_pov_rate():
    custom_features = []
    minimum_wage_effective = input('Enter min wage effective: ')
    unemployment_rate = input('Enter unemployment_rate: ')
    crime_rate = input('Enter crime_rate: ')
    population_million = input('Enter population_million: ')
    homeownership_rate = input('Enter homeownership_rate: ')
    education_million = input('Enter education_million: ')
    welfare_million = input('Enter welfare_million: ')
    avg_wage_index = input('Enter avg_wage_index: ')
    divorce_rate_per_1000_people = input('Enter divorce_rate_per_1000_people: ')
    CPI_Average = input('Enter CPI_Average: ')
    custom_features.extend([float(population_million), 
                     float(education_million),
                     float(welfare_million),
                     float(crime_rate),
                     float(unemployment_rate),
                     float(divorce_rate_per_1000_people), 
                     float(homeownership_rate), 
                     float(minimum_wage_effective), 
                     float(CPI_Average),
                     float(avg_wage_index)])
    return custom_features

In [57]:
# Predicting poverty rate based on custom feature values
predicted_poverty_rate = regressor.predict([predict_pov_rate()])
print(f'The predicted poverty rate is: {predicted_poverty_rate}')

Enter min wage effective: 5.15
Enter unemployment_rate: 4.0
Enter crime_rate: 219.4
Enter population_million: 0.565
Enter homeownership_rate: 71.1
Enter education_million: 2672.5
Enter welfare_million: 381.5
Enter avg_wage_index: 41673.83
Enter divorce_rate_per_1000_people: 3.6
Enter CPI_Average: 218.056
The predicted poverty rate is: [9.7]
