In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
file_path = "/Users/xushiheng/Downloads/Final.csv"
df = pd.read_csv(file_path)

# Filter data for the year range 2010-2023
df = df[(df['Year'] >= 2010) & (df['Year'] <= 2023)]

# Drop unnecessary columns
df = df.drop(columns=['CoC_Number', 'State Initials'])

# Drop rows with missing values
df = df.dropna()

# Define target variables and predictors
target_variables = [
    'Overall Homeless',
    'Overall Homeless Individuals',
    'Overall Homeless People in Families',
    'Unsheltered Homeless',
    'Sheltered Total Homeless'
]

predictors = df.drop(columns=target_variables + ['Year'])  # Exclude target variables and Year
X = predictors.values  # Predictor matrix

# Initialize a dictionary to store model performance metrics
results = {}

# Perform linear regression for each target variable
for target in target_variables:
    y = df[target].values  # Target variable
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Initialize and train the linear regression model
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Calculate performance metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    # Store results
    results[target] = {
        'Mean Squared Error': mse,
        'R-squared': r2
    }

# Convert results to DataFrame
results_df = pd.DataFrame(results).T

# Display the results
print(results_df)

                                     Mean Squared Error  R-squared
Overall Homeless                           3.325341e+06   0.914381
Overall Homeless Individuals               2.795138e+06   0.845110
Overall Homeless People in Families        6.117014e+04   0.989996
Unsheltered Homeless                       3.430697e+06   0.673056
Sheltered Total Homeless                   5.330115e+04   0.997244
