In [2]:
"""
calculate_alpha_beta.py

Usage:
  python calculate_alpha_beta.py

Description:
  - Reads raw nonfarm employment data from raw_nonfarm_jobs.
  - Separates the national series (CES0000000001) from each MSA series.
  - Computes alpha/beta via a simple OLS regression of local vs. national levels.
  - Stores results in alpha_beta_results.
"""

import psycopg2
import pandas as pd
import statsmodels.api as sm

DB_HOST = "localhost"
DB_PORT = "5432"
DB_NAME = "inquire_DB"
DB_USER = "postgres"
DB_PASS = "givedata"  # Replace with actual password

NATIONAL_SERIES_ID = "CES0000000001"  # total nonfarm (seasonally adjusted), national

def load_raw_data():
    """
    Pulls raw data from 'raw_nonfarm_jobs' into a pandas DataFrame.
    Expects columns: [series_id, obs_date, value].
    """
    conn = psycopg2.connect(
        host="localhost", port="5432", dbname="inquire_DB",
        user="postgres", password="givedata"
    )
    query = """
        SELECT series_id, obs_date, value
        FROM raw_nonfarm_jobs
        ORDER BY obs_date
    """
    df = pd.read_sql(query, conn)
    conn.close()
    return df

def compute_alpha_beta(local_series, national_series):
    """
    Given two aligned Series or arrays (local vs. national),
    run a linear regression: local = alpha + beta * national.
    Returns (alpha, beta, r_squared).
    """
    # Add an intercept to the national data for alpha
    X = sm.add_constant(national_series)  # shape: [n, 2]
    model = sm.OLS(local_series, X).fit()
    
    alpha = model.params["const"]
    # Because we used sm.add_constant, the second param name is typically the series name
    # If you did something like national_series.name, we can fetch it. For safety:
    # the second param is the 1-based index in model.params:
    #   model.params.index might be ["const", "value_nat"] or something similar
    # We'll do a robust approach:
    for param_name in model.params.index:
        if param_name != "const":
            beta = model.params[param_name]
    r_sq = model.rsquared
    
    return alpha, beta, r_sq

def store_alpha_beta_results(series_id, alpha, beta, r_sq, start_dt, end_dt):
    """
    Inserts or upserts a row in alpha_beta_results for the given MSA + date range.
    """
    conn = psycopg2.connect(
        host="localhost", port="5432", dbname="inquire_DB",
        user="postgres", password="givedata"
    )
    cur = conn.cursor()
    
    insert_sql = """
    INSERT INTO alpha_beta_results
        (series_id, alpha, beta, r_squared, start_date, end_date)
    VALUES (%s, %s, %s, %s, %s, %s)
    ON CONFLICT (series_id, start_date, end_date)
    DO UPDATE SET
        alpha = EXCLUDED.alpha,
        beta = EXCLUDED.beta,
        r_squared = EXCLUDED.r_squared
    """
    cur.execute(insert_sql, (series_id, alpha, beta, r_sq, start_dt, end_dt))
    
    conn.commit()
    cur.close()
    conn.close()

def main():
    # 1) Load raw data
    df = load_raw_data()
    print(f"Loaded {len(df)} rows from 'raw_nonfarm_jobs'.")

    # 2) Separate out the national vs. MSA data
    df_national = df[df["series_id"] == NATIONAL_SERIES_ID].copy()
    df_national.set_index("obs_date", inplace=True)
    df_national.sort_index(inplace=True)

    df_msas = df[df["series_id"] != NATIONAL_SERIES_ID].copy()
    df_msas.sort_values(by="obs_date", inplace=True)
    
    # For demonstration, we do a direct level vs. level regression
    # You could do yoy or month-over-month changes first if desired.

    # 3) Group MSA data by series_id and compute alpha/beta
    unique_msas = df_msas["series_id"].unique()
    for msa_id in unique_msas:
        subdf = df_msas[df_msas["series_id"] == msa_id].copy()
        subdf.set_index("obs_date", inplace=True)
        
        # Merge with national on obs_date
        merged = pd.merge(subdf, df_national, how="inner", 
                          left_index=True, right_index=True,
                          suffixes=("_local","_nat"))
        
        if len(merged) < 2:
            # Not enough data points to run a regression
            print(f"Skipping {msa_id} due to insufficient overlap with national data.")
            continue
        
        # local = merged["value_local"], national = merged["value_nat"]
        alpha, beta, r_sq = compute_alpha_beta(
            merged["value_local"], merged["value_nat"]
        )
        
        # 4) Store results in alpha_beta_results
        start_dt = merged.index.min()
        end_dt   = merged.index.max()
        
        store_alpha_beta_results(msa_id, alpha, beta, r_sq, start_dt, end_dt)
        print(f"Stored alpha/beta for {msa_id}: alpha={alpha:.4f}, beta={beta:.4f}, r_sq={r_sq:.4f}")

    print("All alpha-beta calculations done.")

if __name__ == "__main__":
    main()

OperationalError: connection to server at "localhost" (::1), port 5432 failed: server sent an error response during GSS encryption exchange
connection to server at "localhost" (::1), port 5432 failed: FATAL:  password authentication failed for user "postgres"
