In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# List of columns to drop if they exist in the merged dataframe
columns_to_drop = ['City', 'Year', 'Total PEH', 'Population', 'PEH Per 100,000', 'Unsheltered Per 100,000']

merged_df = pd.read_csv('../data/processed/pivoted_and_PIT.csv')

# Drop columns only if they are in the dataframe
X = merged_df.drop(columns=[col for col in columns_to_drop if col in merged_df.columns])

# Set response variable and independent variables
y = merged_df['Unsheltered PEH']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Apply Lasso Regression
lasso = Lasso(alpha=0.1)  
lasso.fit(X_train, y_train)

y_pred = lasso.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R^2 Score:", r2)

feature_importance = pd.Series(lasso.coef_, index=X.columns)
print("Feature Importance:\n", feature_importance)


Mean Squared Error: 0.0008784653043647666
R^2 Score: 0.9999998666545051
Feature Importance:
 Unsheltered PEH                         738.079463
bridge to housing network                -0.000000
emergency shelter                         0.000000
family reunification program              0.000000
flexible funds                           -0.000000
food and nutrition                       -0.000000
homeless services                        -0.000000
homelessness prevention                  -0.000000
homeshare program                        -0.000000
housing assistance                        0.000000
housing navigation services               0.000000
housing stability services               -0.000000
motel voucher                            -0.000000
neighborhood revitalization services     -0.000000
opening doors program                    -0.000000
outreach                                  0.000000
project h.o.p.e.                         -0.000000
rapid re housing                        