In [1]:
%pip install pandas
%pip install statsmodels
%pip install numpy


import pandas as pd
import statsmodels.api as sm
import numpy as np

# Step 1: Open and read the CSV file
def read_csv_file(file_path):
    try:
        df = pd.read_csv(file_path)
        return df
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
        return None
    except pd.errors.EmptyDataError:
        print(f"Error: File '{file_path}' is empty.")
        return None

# Step 2: Prepare the data for Negative Binomial regression
def prepare_data(df):
    # Assume the second column (index 1) is the target
    target_column = df.columns[1]
    
    # All columns except the first (index 0) and second (index 1) are features
    feature_columns = df.columns[2:]
    
    X = df[feature_columns]
    y = df[target_column].round().astype(int)  # Round and convert to integer

    X = sm.add_constant(X)  # Add a constant term to the features
    return X, y, target_column, feature_columns

# Step 3: Perform Negative Binomial regression
def perform_negative_binomial_regression(X, y):
    model = sm.GLM(y, X, family=sm.families.NegativeBinomial())
    results = model.fit()
    return results

# Step 4: Print the results
def print_results(results, target_column, feature_columns):
    print(f"Target column: {target_column}")
    print(f"Feature columns: {', '.join(feature_columns)}")
    print("\nRegression Results:")
    print(results.summary())

# Main function
def main():
    # Specify the file path
    file_path = r'/Users/pushtikanani/Downloads/population stratifaction (Sheet3).csv'
    
    # Read the CSV file
    df = read_csv_file(file_path)
    
    if df is not None:
        # Prepare the data
        X, y, target_column, feature_columns = prepare_data(df)
        
        # Perform Negative Binomial regression
        results = perform_negative_binomial_regression(X, y)
        
        # Print the results
        print_results(results, target_column, feature_columns)

if __name__ == "__main__":
    main()


Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Target column: Y = Stratification (dependent variable) Fentanyl Deaths Predicted at Zipcode lvl
Feature columns: X1 = Fentanyl Deaths (0<18), X2 = Fentanyl Deaths (18-25), X3 = Fentanyl Deaths (26-34), X4 = Fentanyl Deaths (35-50), X5 = Fentanyl Deaths (>50), X6 = Educational attainment of 18 and above for Bachelor and above degrees, X7 = White Portion of Fentanyl Deaths per Zipcode (47%), X8 = Black Portion of Fentanyl Deaths per Zipcode (15.5%), X9 = Asian Portion of Fentanyl Deaths per Zipcode (4.1%), X10 = Latino Portion of Fentanyl Deaths per Zipcode (29.6%), X11 = Other Race Portion of Fentanyl Deaths per Zipcode (3.9%)

Regression Results:
                                                Generalized Linear Model Regression Results                                                 
Dep. V

