<a href="https://colab.research.google.com/github/ShikharV010/gist_daily_runs/blob/main/For_Friday_Writer_Allocation_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from ortools.linear_solver import pywraplp
from datetime import datetime
import gspread
from google.colab import auth
from google.auth import default

def standardize_writer_ids(df, id_column='Writer ID'):
    """Convert writer IDs to lowercase strings for consistency"""
    if id_column in df.columns:
        df[id_column] = df[id_column].astype(str).str.lower()
    return df

def read_google_sheet(gc, spreadsheet_id, sheet_name=0):
    """Read a Google Sheet into a pandas DataFrame"""
    try:
        spreadsheet = gc.open_by_key(spreadsheet_id)
        worksheet = spreadsheet.get_worksheet(sheet_name) if isinstance(sheet_name, int) else spreadsheet.worksheet(sheet_name)
        data = worksheet.get_all_values()
        if not data:
            return pd.DataFrame()
        df = pd.DataFrame(data[1:], columns=data[0])
        for col in df.columns:
            try:
                df[col] = pd.to_numeric(df[col], errors='ignore')
            except:
                pass
        return df
    except Exception as e:
        print(f"Error reading Google Sheet: {e}")
        return pd.DataFrame()

def get_campaign_priority_weights(contracted_approved_df):
    """Calculate weights based on billing date proximity and First Time Cycle status"""
    priority_weights = {}
    today = datetime.now().replace(tzinfo=None)

    for _, campaign_row in contracted_approved_df.iterrows():
        campaign = campaign_row['Campaign ID']
        base_weight = 1.0

        # Check if this is a First Time Cycle campaign
        is_first_time = False
        if 'First Time Cycle' in campaign_row:
            first_time_value = str(campaign_row['First Time Cycle']).strip().lower()
            is_first_time = first_time_value in ['yes', 'true', '1']

        if is_first_time:
            base_weight *= 20.0
            print(f"Campaign {campaign} is First Time Cycle - applying 20x priority multiplier")

        # Billing date logic
        if 'Next Billing Date' not in campaign_row:
            priority_weights[campaign] = base_weight
            continue

        billing_date_str = campaign_row['Next Billing Date']
        if pd.isna(billing_date_str) or not billing_date_str:
            priority_weights[campaign] = base_weight
            continue

        try:
            billing_date = pd.to_datetime(billing_date_str).replace(tzinfo=None)
        except:
            try:
                billing_date = datetime.strptime(billing_date_str, '%m/%d/%Y')
            except:
                try:
                    billing_date = datetime.strptime(billing_date_str, '%Y-%m-%d')
                except:
                    priority_weights[campaign] = base_weight
                    continue

        days_until_billing = (billing_date - today).days
        billing_multiplier = 1.0
        if days_until_billing <= 0:
            billing_multiplier = 10.0
        elif days_until_billing <= 7:
            billing_multiplier = 8.0
        elif days_until_billing <= 14:
            billing_multiplier = 5.0
        elif days_until_billing <= 30:
            billing_multiplier = 3.0

        priority_weights[campaign] = base_weight * billing_multiplier

    return priority_weights

def optimize_writer_assignments(contracted_approved_df, campaign_writer_df, writer_capacity_df):
    """
    Optimize writer assignments and return a clean DataFrame for PostgreSQL insertion
    """
    # Create solver
    solver = pywraplp.Solver.CreateSolver('SCIP')
    if not solver:
        return None, "Could not create solver."

    # Get unique writers and campaigns
    writers = writer_capacity_df['Writer ID'].unique()
    campaigns = contracted_approved_df['Campaign ID'].unique()

    print(f"Optimization setup: {len(writers)} writers and {len(campaigns)} campaigns")

    if len(writers) == 0 or len(campaigns) == 0:
        return None, "No writers or campaigns found."

    # Get priority weights
    priority_weights = get_campaign_priority_weights(contracted_approved_df)

    # Identify First Time Cycle campaigns
    first_time_campaigns = []
    first_time_targets = {}

    for _, campaign_row in contracted_approved_df.iterrows():
        campaign = campaign_row['Campaign ID']
        if 'First Time Cycle' in campaign_row:
            first_time_value = str(campaign_row['First Time Cycle']).strip().lower()
            if first_time_value in ['yes', 'true', '1']:
                first_time_campaigns.append(campaign)
                outstanding = campaign_row['Outstanding on day'] if 'Outstanding on day' in campaign_row else 0
                topics_approved = campaign_row['Topics Approved'] if 'Topics Approved' in campaign_row else 0

                if pd.notna(outstanding) and pd.notna(topics_approved) and topics_approved > 0:
                    assignable_content = min(outstanding, topics_approved)
                    target = min(13, int(assignable_content / 2.5))
                    first_time_targets[campaign] = target
                elif pd.notna(topics_approved) and topics_approved > 0:
                    first_time_targets[campaign] = 10

    # Article count variables
    article_count = {}
    for writer in writers:
        for campaign in campaigns:
            article_count[(writer, campaign)] = solver.IntVar(0, 1000, f'count_{writer}_{campaign}')

    # Writer capacity constraints
    for writer in writers:
        writer_capacity_rows = writer_capacity_df[writer_capacity_df['Writer ID'] == writer]
        if writer_capacity_rows.empty:
            continue

        try:
            writer_weekly_capacity = writer_capacity_rows['Weekly Capacity'].iloc[0]
            if pd.isna(writer_weekly_capacity):
                writer_weekly_capacity = 1
        except IndexError:
            writer_weekly_capacity = 1

        solver.Add(
            sum(article_count[(writer, campaign)] for campaign in campaigns) <= writer_weekly_capacity
        )

    # Campaign constraints
    for campaign in campaigns:
        campaign_rows = contracted_approved_df[contracted_approved_df['Campaign ID'] == campaign]
        if not campaign_rows.empty:
            outstanding = campaign_rows['Outstanding on day'].fillna(0).iloc[0]
            topics_approved = campaign_rows['Topics Approved'].fillna(0).iloc[0]

            if topics_approved <= 0:
                assignable_content = 0
            else:
                assignable_content = min(outstanding, topics_approved)

            if assignable_content > 0:
                solver.Add(
                    sum(article_count[(writer, campaign)] for writer in writers) <= assignable_content
                )

                if campaign in first_time_campaigns and campaign in first_time_targets:
                    min_required = first_time_targets[campaign]
                    solver.Add(
                        sum(article_count[(writer, campaign)] for writer in writers) >= min_required
                    )
                else:
                    solver.Add(
                        sum(article_count[(writer, campaign)] for writer in writers) >= min(3, assignable_content)
                    )
            else:
                for writer in writers:
                    solver.Add(article_count[(writer, campaign)] == 0)

    # Find faced writer column
    faced_writer_col = None
    for col in campaign_writer_df.columns:
        if 'faced' in col.lower() and 'writer' in col.lower():
            faced_writer_col = col
            break

    if not faced_writer_col:
        faced_writer_col = 'Faced Writer'
        if faced_writer_col not in campaign_writer_df.columns:
            campaign_writer_df[faced_writer_col] = False

    # Objective function
    objective = solver.Objective()
    faced_writer_weight = 15
    context_weight = 10

    for writer in writers:
        for campaign in campaigns:
            base_weight = 0.1

            context_row = campaign_writer_df[
                (campaign_writer_df['Writer ID'].str.lower() == str(writer).lower()) &
                (campaign_writer_df['Campaign ID'] == campaign)
            ]

            if not context_row.empty:
                base_weight = 1.0

                if faced_writer_col in context_row.columns:
                    try:
                        faced_value = context_row.iloc[0][faced_writer_col]
                        if isinstance(faced_value, str):
                            faced_writer = faced_value.lower() == 'true'
                        else:
                            faced_writer = bool(faced_value)

                        if faced_writer:
                            base_weight *= faced_writer_weight
                    except:
                        pass

                if 'Context Score' in context_row.columns:
                    try:
                        context_score = context_row.iloc[0]['Context Score']
                        if pd.notna(context_score):
                            base_weight *= (1 + float(context_score) * context_weight)
                    except:
                        pass

            base_weight *= priority_weights.get(campaign, 1.0)
            objective.SetCoefficient(article_count[(writer, campaign)], base_weight)

    objective.SetMaximization()

    # Solve
    print("Solving optimization problem...")
    status = solver.Solve()

    if status != pywraplp.Solver.OPTIMAL:
        error_message = "The problem is infeasible." if status == pywraplp.Solver.INFEASIBLE else f"Optimization failed with status code: {status}"
        return None, error_message

    print("Optimization problem solved successfully")

    # Collect results for PostgreSQL
    assignments = []
    current_time = datetime.now()

    for writer in writers:
        for campaign in campaigns:
            count_value = article_count[(writer, campaign)].solution_value()

            if count_value > 0:
                count_value = int(count_value)

                # Get campaign data
                campaign_data = contracted_approved_df[contracted_approved_df['Campaign ID'] == campaign]

                # Get writer-campaign relationship data
                context_row = campaign_writer_df[
                    (campaign_writer_df['Writer ID'].str.lower() == str(writer).lower()) &
                    (campaign_writer_df['Campaign ID'] == campaign)
                ]

                # Extract relevant data
                campaign_url = ""
                next_billing_date = None
                is_first_time_cycle = False
                faced_writer = False
                context_score = None

                if not campaign_data.empty:
                    if 'URL' in campaign_data.columns:
                        campaign_url = campaign_data['URL'].iloc[0] if pd.notna(campaign_data['URL'].iloc[0]) else ""

                    if 'Next Billing Date' in campaign_data.columns:
                        billing_date_str = campaign_data['Next Billing Date'].iloc[0]
                        if pd.notna(billing_date_str) and billing_date_str:
                            try:
                                next_billing_date = pd.to_datetime(billing_date_str).date()
                            except:
                                next_billing_date = None

                    if 'First Time Cycle' in campaign_data.columns:
                        first_time_value = str(campaign_data['First Time Cycle'].iloc[0]).strip().lower()
                        is_first_time_cycle = first_time_value in ['yes', 'true', '1']

                if not context_row.empty:
                    if faced_writer_col in context_row.columns:
                        try:
                            faced_value = context_row.iloc[0][faced_writer_col]
                            faced_writer = bool(faced_value)
                        except:
                            faced_writer = False

                    if 'Context Score' in context_row.columns:
                        try:
                            context_score = float(context_row.iloc[0]['Context Score'])
                        except:
                            context_score = None

                # Create assignment record
                assignment = {
                    'campaign_id': campaign,
                    'campaign_url': campaign_url,
                    'writer_id': writer,
                    'weekly_target': count_value,
                    'is_faced_writer': faced_writer,
                    'is_first_time_cycle': is_first_time_cycle,
                    'context_score': context_score,
                    'next_billing_date': next_billing_date,
                    'assignment_date': current_time.date(),
                    'created_at': current_time,
                    'updated_at': current_time
                }

                assignments.append(assignment)

    # Create DataFrame
    assignments_df = pd.DataFrame(assignments)

    print(f"Created {len(assignments_df)} writer-campaign assignments")
    if not assignments_df.empty:
        print(f"Total articles assigned: {assignments_df['weekly_target'].sum()}")

        # Print summary by campaign type
        first_time_assignments = assignments_df[assignments_df['is_first_time_cycle'] == True]
        if not first_time_assignments.empty:
            print(f"First Time Cycle assignments: {len(first_time_assignments)} assignments, {first_time_assignments['weekly_target'].sum()} articles")

    return assignments_df, ""

def main():
    """Main function to run the writer assignment optimization"""
    print("=== Campaign Writer Assignment Extractor ===")

    # Authenticate with Google
    auth.authenticate_user()
    creds, _ = default()
    gc = gspread.authorize(creds)

    # Sheet IDs
    contracted_approved_sheet_id = '1YI5zepmJO4ci6qZVpBsSgcQFKTRGrijqS_ueGOXh8WQ'
    context_table_sheet_id = '1rYhyoTTdvyvuZDdYytmoEtQJRVSo5XLJv9xCwHOcd8s'
    writer_capacity_sheet_id = '1QPGzI73Ma0qbsbBpiucsJSX2ikmY_i4FKqIJ9OnWFw0'

    print("Loading data from Google Sheets...")

    # Load data
    contracted_approved_df = read_google_sheet(gc, contracted_approved_sheet_id)
    campaign_writer_df = read_google_sheet(gc, context_table_sheet_id)
    writer_capacity_df = read_google_sheet(gc, writer_capacity_sheet_id)

    # Standardize writer IDs
    campaign_writer_df = standardize_writer_ids(campaign_writer_df)
    writer_capacity_df = standardize_writer_ids(writer_capacity_df)

    # Filter for active campaigns (if Status and Type columns exist)
    if 'Status' in contracted_approved_df.columns and 'Type' in contracted_approved_df.columns:
        contracted_approved_df = contracted_approved_df[
            (contracted_approved_df['Status'] == 'Active') &
            (contracted_approved_df['Type'] == 'Product')
        ]
        print(f"Active Product campaigns: {len(contracted_approved_df)}")

    # Add missing columns if needed
    required_columns = ['Campaign ID', 'URL', 'Topics Approved', 'Outstanding on day', 'Next Billing Date', 'First Time Cycle']
    for col in required_columns:
        if col not in contracted_approved_df.columns:
            if col in ['Next Billing Date', 'URL']:
                contracted_approved_df[col] = ""
            elif col == 'First Time Cycle':
                contracted_approved_df[col] = "No"
            else:
                contracted_approved_df[col] = 0

    # Convert numeric columns
    for col in ['Topics Approved', 'Outstanding on day']:
        if col in contracted_approved_df.columns:
            contracted_approved_df[col] = pd.to_numeric(contracted_approved_df[col], errors='coerce').fillna(0)

    # Filter for active writers
    active_writer_capacity_df = writer_capacity_df[writer_capacity_df['Weekly Capacity'] > 0].copy()

    # Run optimization
    print("Running optimization...")
    assignments_df, error_message = optimize_writer_assignments(
        contracted_approved_df,
        campaign_writer_df,
        active_writer_capacity_df
    )

    if assignments_df is not None and not assignments_df.empty:
        print("\n=== ASSIGNMENT RESULTS ===")
        print("DataFrame ready for PostgreSQL insertion:")
        print(f"Shape: {assignments_df.shape}")
        print("\nColumn types:")
        print(assignments_df.dtypes)
        print("\nFirst 5 rows:")
        print(assignments_df.head())
        print("\nSample of all columns:")
        for col in assignments_df.columns:
            print(f"{col}: {assignments_df[col].iloc[0] if len(assignments_df) > 0 else 'N/A'}")

        return assignments_df
    else:
        print(f"ERROR: {error_message}")
        return None

# Run the main function
if __name__ == "__main__":
    result_df = main()

=== Campaign Writer Assignment Extractor ===
Loading data from Google Sheets...


  df[col] = pd.to_numeric(df[col], errors='ignore')


Active Product campaigns: 131
Running optimization...
Optimization setup: 68 writers and 131 campaigns
Campaign 0351b0fe-1ee7-4d78-827d-8c08e33a0b86 is First Time Cycle - applying 20x priority multiplier
Campaign 384c5612-722b-43d8-95c9-2c50247c6a93 is First Time Cycle - applying 20x priority multiplier
Campaign 5f2ed42c-58e8-402e-8d8c-aff56b958c6b is First Time Cycle - applying 20x priority multiplier
Campaign 61f708d1-7421-4ffb-a3be-0967aa4b072c is First Time Cycle - applying 20x priority multiplier
Campaign 72663ba7-3e84-4204-a72f-7fce50607eea is First Time Cycle - applying 20x priority multiplier
Campaign 80612b3f-ea4b-4bd7-99b7-37a1421fafc1 is First Time Cycle - applying 20x priority multiplier
Campaign 89349814-59d2-477c-8b56-9c9d471e7f9e is First Time Cycle - applying 20x priority multiplier
Campaign a8998f69-bb75-4b2e-9d80-5e9815533ecb is First Time Cycle - applying 20x priority multiplier
Campaign b4154640-a977-4927-8bd0-932154cfca90 is First Time Cycle - applying 20x priority

In [None]:
import pandas as pd
import numpy as np
from ortools.linear_solver import pywraplp
from datetime import datetime
import gspread
from google.colab import auth
from google.auth import default
import psycopg2
from sqlalchemy import create_engine, text

# PostgreSQL connection
pg_params = {
    'host': 'gw-postgres-dev.celzx4qnlkfp.us-east-1.rds.amazonaws.com',
    'database': 'gw_prod',
    'user': 'airbyte_user',
    'password': 'airbyte_user_password',
    'port': '5432'
}

def read_postgres_table(table_name):
    """Read data from PostgreSQL table"""
    try:
        engine = create_engine(f"postgresql://{pg_params['user']}:{pg_params['password']}@{pg_params['host']}:{pg_params['port']}/{pg_params['database']}")
        df = pd.read_sql(f"SELECT * FROM {table_name}", engine)
        engine.dispose()
        print(f"✅ Loaded {len(df)} records from {table_name}")
        return df
    except Exception as e:
        print(f"❌ Error reading {table_name}: {e}")
        return pd.DataFrame()

def read_google_sheet(gc, spreadsheet_id, sheet_name=0):
    """Read a Google Sheet into a pandas DataFrame"""
    try:
        spreadsheet = gc.open_by_key(spreadsheet_id)
        worksheet = spreadsheet.get_worksheet(sheet_name) if isinstance(sheet_name, int) else spreadsheet.worksheet(sheet_name)
        data = worksheet.get_all_values()
        if not data:
            return pd.DataFrame()
        df = pd.DataFrame(data[1:], columns=data[0])
        for col in df.columns:
            try:
                df[col] = pd.to_numeric(df[col], errors='ignore')
            except:
                pass
        return df
    except Exception as e:
        print(f"Error reading Google Sheet: {e}")
        return pd.DataFrame()

def get_campaign_priority_weights(contracted_approved_df):
    """Calculate weights based on billing date proximity and First Time Cycle status"""
    priority_weights = {}
    today = datetime.now().replace(tzinfo=None)

    for _, campaign_row in contracted_approved_df.iterrows():
        campaign = campaign_row['Campaign ID']
        base_weight = 1.0

        # First Time Cycle gets 20x priority
        if 'First Time Cycle' in campaign_row:
            first_time_value = str(campaign_row['First Time Cycle']).strip().lower()
            if first_time_value in ['yes', 'true', '1']:
                base_weight *= 20.0
                print(f"Campaign {campaign} is First Time Cycle - applying 20x priority")

        # Billing date proximity weights
        if 'Next Billing Date' in campaign_row:
            billing_date_str = campaign_row['Next Billing Date']
            if pd.notna(billing_date_str) and billing_date_str:
                try:
                    billing_date = pd.to_datetime(billing_date_str).replace(tzinfo=None)
                    days_until_billing = (billing_date - today).days

                    if days_until_billing <= 0:
                        base_weight *= 10.0
                    elif days_until_billing <= 7:
                        base_weight *= 8.0
                    elif days_until_billing <= 14:
                        base_weight *= 5.0
                    elif days_until_billing <= 30:
                        base_weight *= 3.0
                except:
                    pass

        priority_weights[campaign] = base_weight
    return priority_weights

def optimize_writer_assignments(contracted_approved_df, campaign_writer_df, writer_capacity_df):
    """Optimize writer assignments and return a clean DataFrame for PostgreSQL insertion"""
    solver = pywraplp.Solver.CreateSolver('SCIP')
    if not solver:
        return None, "Could not create solver."

    writers = writer_capacity_df['Writer ID'].unique()
    campaigns = contracted_approved_df['Campaign ID'].unique()

    print(f"Optimization: {len(writers)} writers, {len(campaigns)} campaigns")

    if len(writers) == 0 or len(campaigns) == 0:
        return None, "No writers or campaigns found."

    priority_weights = get_campaign_priority_weights(contracted_approved_df)

    # First Time Cycle targets
    first_time_targets = {}
    for _, campaign_row in contracted_approved_df.iterrows():
        campaign = campaign_row['Campaign ID']
        if 'First Time Cycle' in campaign_row:
            first_time_value = str(campaign_row['First Time Cycle']).strip().lower()
            if first_time_value in ['yes', 'true', '1']:
                outstanding = campaign_row.get('Outstanding on day', 0)
                topics_approved = campaign_row.get('Topics Approved', 0)
                if pd.notna(outstanding) and pd.notna(topics_approved) and topics_approved > 0:
                    assignable_content = min(outstanding, topics_approved)
                    target = min(13, int(assignable_content / 2.5))
                    first_time_targets[campaign] = target

    # Variables
    article_count = {}
    for writer in writers:
        for campaign in campaigns:
            article_count[(writer, campaign)] = solver.IntVar(0, 1000, f'count_{writer}_{campaign}')

    # Writer capacity constraints
    for writer in writers:
        writer_rows = writer_capacity_df[writer_capacity_df['Writer ID'] == writer]
        if not writer_rows.empty:
            capacity = writer_rows['Weekly Capacity'].iloc[0]
            if pd.notna(capacity):
                solver.Add(sum(article_count[(writer, campaign)] for campaign in campaigns) <= capacity)

    # Campaign constraints
    for campaign in campaigns:
        campaign_rows = contracted_approved_df[contracted_approved_df['Campaign ID'] == campaign]
        if not campaign_rows.empty:
            outstanding = campaign_rows['Outstanding on day'].fillna(0).iloc[0]
            topics_approved = campaign_rows['Topics Approved'].fillna(0).iloc[0]

            assignable_content = min(outstanding, topics_approved) if topics_approved > 0 else 0

            if assignable_content > 0:
                solver.Add(sum(article_count[(writer, campaign)] for writer in writers) <= assignable_content)

                if campaign in first_time_targets:
                    solver.Add(sum(article_count[(writer, campaign)] for writer in writers) >= first_time_targets[campaign])
                else:
                    solver.Add(sum(article_count[(writer, campaign)] for writer in writers) >= min(3, assignable_content))
            else:
                for writer in writers:
                    solver.Add(article_count[(writer, campaign)] == 0)

    # Objective function
    objective = solver.Objective()
    faced_writer_col = next((col for col in campaign_writer_df.columns if 'faced' in col.lower() and 'writer' in col.lower()), 'Faced Writer')

    for writer in writers:
        for campaign in campaigns:
            base_weight = 0.1

            context_row = campaign_writer_df[
                (campaign_writer_df['Writer ID'].str.lower() == str(writer).lower()) &
                (campaign_writer_df['Campaign ID'] == campaign)
            ]

            if not context_row.empty:
                base_weight = 1.0

                # Faced writer bonus
                if faced_writer_col in context_row.columns:
                    try:
                        if bool(context_row.iloc[0][faced_writer_col]):
                            base_weight *= 15
                    except:
                        pass

                # Context score bonus
                if 'Context Score' in context_row.columns:
                    try:
                        context_score = context_row.iloc[0]['Context Score']
                        if pd.notna(context_score):
                            base_weight *= (1 + float(context_score) * 10)
                    except:
                        pass

            base_weight *= priority_weights.get(campaign, 1.0)
            objective.SetCoefficient(article_count[(writer, campaign)], base_weight)

    objective.SetMaximization()

    print("Solving optimization...")
    status = solver.Solve()

    if status != pywraplp.Solver.OPTIMAL:
        return None, "Optimization failed"

    # Collect results
    assignments = []
    current_time = datetime.now()

    for writer in writers:
        for campaign in campaigns:
            count_value = article_count[(writer, campaign)].solution_value()
            if count_value > 0:
                campaign_data = contracted_approved_df[contracted_approved_df['Campaign ID'] == campaign]
                context_row = campaign_writer_df[
                    (campaign_writer_df['Writer ID'].str.lower() == str(writer).lower()) &
                    (campaign_writer_df['Campaign ID'] == campaign)
                ]

                campaign_url = ""
                next_billing_date = None
                is_first_time_cycle = False
                faced_writer = False
                context_score = None

                if not campaign_data.empty:
                    if 'URL' in campaign_data.columns:
                        url_value = campaign_data['URL'].iloc[0]
                        campaign_url = str(url_value) if pd.notna(url_value) else ""

                    if 'Next Billing Date' in campaign_data.columns:
                        billing_str = campaign_data['Next Billing Date'].iloc[0]
                        if pd.notna(billing_str) and str(billing_str).strip():
                            try:
                                next_billing_date = pd.to_datetime(billing_str).date()
                            except:
                                pass

                    if 'First Time Cycle' in campaign_data.columns:
                        first_time_value = str(campaign_data['First Time Cycle'].iloc[0]).strip().lower()
                        is_first_time_cycle = first_time_value in ['yes', 'true', '1']

                if not context_row.empty:
                    if faced_writer_col in context_row.columns:
                        try:
                            faced_value = context_row.iloc[0][faced_writer_col]
                            faced_writer = bool(faced_value)
                        except:
                            pass
                    if 'Context Score' in context_row.columns:
                        try:
                            score_value = context_row.iloc[0]['Context Score']
                            if pd.notna(score_value):
                                context_score = float(score_value)
                        except:
                            pass

                assignments.append({
                    'campaign_id': campaign,
                    'campaign_url': campaign_url,
                    'writer_id': writer,
                    'weekly_target': int(count_value),
                    'is_faced_writer': faced_writer,
                    'is_first_time_cycle': is_first_time_cycle,
                    'context_score': context_score,
                    'next_billing_date': next_billing_date,
                    'assignment_date': current_time.date(),
                    'created_at': current_time,
                    'updated_at': current_time
                })

    assignments_df = pd.DataFrame(assignments)
    print(f"Created {len(assignments_df)} assignments, {assignments_df['weekly_target'].sum()} total articles")

    first_time_count = assignments_df[assignments_df['is_first_time_cycle']]['weekly_target'].sum()
    if first_time_count > 0:
        print(f"First Time Cycle: {first_time_count} articles")

    return assignments_df, ""

def create_table_if_not_exists():
    """Create/update the table if needed"""
    try:
        conn = psycopg2.connect(**pg_params)
        cursor = conn.cursor()

        # Add is_latest column if it doesn't exist
        cursor.execute("""
            ALTER TABLE gist.csmmailer_fridayallocation
            ADD COLUMN IF NOT EXISTS is_latest BOOLEAN DEFAULT FALSE;
        """)

        # Create index if it doesn't exist
        cursor.execute("""
            CREATE INDEX IF NOT EXISTS idx_fridayallocation_is_latest
            ON gist.csmmailer_fridayallocation(is_latest);
        """)

        conn.commit()
        cursor.close()
        conn.close()
        print("✅ Table updated successfully")
        return True
    except Exception as e:
        print(f"❌ Error updating table: {e}")
        return False

def insert_to_postgres(assignments_df):
    """Insert assignment data to PostgreSQL with latest flag tracking"""
    if assignments_df is None or assignments_df.empty:
        print("❌ No data to insert")
        return False

    try:
        engine = create_engine(f"postgresql://{pg_params['user']}:{pg_params['password']}@{pg_params['host']}:{pg_params['port']}/{pg_params['database']}")

        current_run_time = datetime.now()
        assignments_df = assignments_df.copy()
        assignments_df['run_timestamp'] = current_run_time
        assignments_df['is_latest'] = True

        # Update previous records
        with engine.connect() as conn:
            conn.execute(text("UPDATE gist.csmmailer_fridayallocation SET is_latest = FALSE WHERE is_latest = TRUE"))
            conn.commit()

        # Data type conversions
        assignments_df['weekly_target'] = assignments_df['weekly_target'].astype(int)
        assignments_df['is_faced_writer'] = assignments_df['is_faced_writer'].astype(bool)
        assignments_df['is_first_time_cycle'] = assignments_df['is_first_time_cycle'].astype(bool)
        assignments_df['is_latest'] = assignments_df['is_latest'].astype(bool)
        assignments_df['context_score'] = assignments_df['context_score'].replace({np.nan: None})

        if 'next_billing_date' in assignments_df.columns:
            assignments_df['next_billing_date'] = pd.to_datetime(assignments_df['next_billing_date'], errors='coerce').dt.date

        assignments_df['assignment_date'] = pd.to_datetime(assignments_df['assignment_date']).dt.date
        assignments_df['created_at'] = pd.to_datetime(assignments_df['created_at'])
        assignments_df['updated_at'] = pd.to_datetime(assignments_df['updated_at'])

        print(f"📊 Inserting {len(assignments_df)} records at {current_run_time}")

        assignments_df.to_sql(
            name='csmmailer_fridayallocation',
            con=engine,
            schema='gist',
            if_exists='append',
            index=False,
            method='multi'
        )

        # Summary stats
        with engine.connect() as conn:
            total_records = conn.execute(text("SELECT COUNT(*) FROM gist.csmmailer_fridayallocation")).scalar()
            latest_records = conn.execute(text("SELECT COUNT(*) FROM gist.csmmailer_fridayallocation WHERE is_latest = TRUE")).scalar()

            print(f"✅ Success! Total records: {total_records}, Latest: {latest_records}")

        engine.dispose()
        return True

    except Exception as e:
        print(f"❌ Insert error: {e}")
        return False

def get_recent_runs(limit=3):
    """Get summary of recent optimization runs"""
    try:
        engine = create_engine(f"postgresql://{pg_params['user']}:{pg_params['password']}@{pg_params['host']}:{pg_params['port']}/{pg_params['database']}")

        query = f"""
        SELECT run_timestamp, COUNT(*) as assignments, SUM(weekly_target) as articles,
               MAX(CASE WHEN is_latest THEN 1 ELSE 0 END) as is_latest
        FROM gist.csmmailer_fridayallocation
        GROUP BY run_timestamp
        ORDER BY run_timestamp DESC
        LIMIT {limit}
        """

        df = pd.read_sql(query, engine)

        if not df.empty:
            print(f"\n📋 Recent Runs:")
            for _, row in df.iterrows():
                indicator = "🟢 LATEST" if row['is_latest'] else "🔴"
                print(f"{row['run_timestamp']} {indicator} - {row['assignments']} assignments, {row['articles']} articles")

        engine.dispose()
        return df
    except Exception as e:
        print(f"❌ Error getting runs: {e}")
        return pd.DataFrame()

def main():
    """Main function to run the complete writer assignment system"""
    print("=== Friday Writer Allocation System ===")

    # Google Sheets authentication (only for writer capacity)
    auth.authenticate_user()
    creds, _ = default()
    gc = gspread.authorize(creds)

    # Load data - contracted and context from PostgreSQL, capacity from Google Sheets
    capacity_sheet = '1QPGzI73Ma0qbsbBpiucsJSX2ikmY_i4FKqIJ9OnWFw0'

    print("Loading data...")
    contracted_df = read_postgres_table('gist.writerallocation_contractedapproved')
    campaign_writer_df = read_postgres_table('gist.writerallocation_contexttable')
    writer_capacity_df = read_google_sheet(gc, capacity_sheet)

    # Check data availability
    if contracted_df.empty or campaign_writer_df.empty or writer_capacity_df.empty:
        print("❌ Missing required data from sources")
        return None

    # Standardize writer IDs
    if 'Writer ID' in campaign_writer_df.columns:
        campaign_writer_df['Writer ID'] = campaign_writer_df['Writer ID'].astype(str).str.lower()
    if 'writer_id' in campaign_writer_df.columns:
        campaign_writer_df['writer_id'] = campaign_writer_df['writer_id'].astype(str).str.lower()
        campaign_writer_df['Writer ID'] = campaign_writer_df['writer_id']

    if 'Writer ID' in writer_capacity_df.columns:
        writer_capacity_df['Writer ID'] = writer_capacity_df['Writer ID'].astype(str).str.lower()
    if 'writer_id' in writer_capacity_df.columns:
        writer_capacity_df['writer_id'] = writer_capacity_df['writer_id'].astype(str).str.lower()
        writer_capacity_df['Writer ID'] = writer_capacity_df['writer_id']

    # Standardize campaign IDs
    if 'Campaign ID' in contracted_df.columns:
        pass
    elif 'campaign_id' in contracted_df.columns:
        contracted_df['Campaign ID'] = contracted_df['campaign_id']

    if 'Campaign ID' in campaign_writer_df.columns:
        pass
    elif 'campaign_id' in campaign_writer_df.columns:
        campaign_writer_df['Campaign ID'] = campaign_writer_df['campaign_id']

    # Filter active campaigns - adapt to PostgreSQL column names
    status_col = 'Status' if 'Status' in contracted_df.columns else 'status'
    type_col = 'Type' if 'Type' in contracted_df.columns else 'type'

    if status_col in contracted_df.columns and type_col in contracted_df.columns:
        contracted_df = contracted_df[
            (contracted_df[status_col] == 'Active') &
            (contracted_df[type_col] == 'Product')
        ]
        print(f"Active campaigns: {len(contracted_df)}")

    # Standardize column names for required columns
    column_mapping = {
        'campaign_id': 'Campaign ID',
        'url': 'URL',
        'topics_approved': 'Topics Approved',
        'outstanding_on_day': 'Outstanding on day',
        'next_billing_date': 'Next Billing Date',
        'first_time_cycle': 'First Time Cycle'
    }

    for pg_col, std_col in column_mapping.items():
        if pg_col in contracted_df.columns and std_col not in contracted_df.columns:
            contracted_df[std_col] = contracted_df[pg_col]

    # Standardize context table column names
    context_column_mapping = {
        'writer_id': 'Writer ID',
        'campaign_id': 'Campaign ID',
        'faced_writer': 'Faced Writer',
        'context_score': 'Context Score'
    }

    for pg_col, std_col in context_column_mapping.items():
        if pg_col in campaign_writer_df.columns and std_col not in campaign_writer_df.columns:
            campaign_writer_df[std_col] = campaign_writer_df[pg_col]

    # Standardize capacity table column names
    capacity_column_mapping = {
        'writer_id': 'Writer ID',
        'weekly_capacity': 'Weekly Capacity'
    }

    for pg_col, std_col in capacity_column_mapping.items():
        if pg_col in writer_capacity_df.columns and std_col not in writer_capacity_df.columns:
            writer_capacity_df[std_col] = writer_capacity_df[pg_col]

    # Add missing columns if needed
    required_cols = ['Campaign ID', 'URL', 'Topics Approved', 'Outstanding on day', 'Next Billing Date', 'First Time Cycle']
    for col in required_cols:
        if col not in contracted_df.columns:
            if col in ['Next Billing Date', 'URL']:
                contracted_df[col] = ""
            elif col == 'First Time Cycle':
                contracted_df[col] = "No"
            else:
                contracted_df[col] = 0

    # Convert numeric columns
    for col in ['Topics Approved', 'Outstanding on day']:
        if col in contracted_df.columns:
            contracted_df[col] = pd.to_numeric(contracted_df[col], errors='coerce').fillna(0)

    # Filter active writers
    active_writers_df = writer_capacity_df[writer_capacity_df['Weekly Capacity'] > 0].copy()

    # Run optimization
    assignments_df, error = optimize_writer_assignments(contracted_df, campaign_writer_df, active_writers_df)

    if assignments_df is not None:
        # Setup database and insert
        if create_table_if_not_exists():
            get_recent_runs()
            success = insert_to_postgres(assignments_df)
            if success:
                get_recent_runs()
                return assignments_df

    print(f"ERROR: {error}")
    return None

# Run the system
if __name__ == "__main__":
    result = main()

=== Friday Writer Allocation System ===
Loading data...
✅ Loaded 161 records from gist.writerallocation_contractedapproved
✅ Loaded 854 records from gist.writerallocation_contexttable


  df[col] = pd.to_numeric(df[col], errors='ignore')


Active campaigns: 131
Optimization: 68 writers, 131 campaigns
Campaign 0351b0fe-1ee7-4d78-827d-8c08e33a0b86 is First Time Cycle - applying 20x priority
Campaign 384c5612-722b-43d8-95c9-2c50247c6a93 is First Time Cycle - applying 20x priority
Campaign 5f2ed42c-58e8-402e-8d8c-aff56b958c6b is First Time Cycle - applying 20x priority
Campaign 61f708d1-7421-4ffb-a3be-0967aa4b072c is First Time Cycle - applying 20x priority
Campaign 72663ba7-3e84-4204-a72f-7fce50607eea is First Time Cycle - applying 20x priority
Campaign 80612b3f-ea4b-4bd7-99b7-37a1421fafc1 is First Time Cycle - applying 20x priority
Campaign 89349814-59d2-477c-8b56-9c9d471e7f9e is First Time Cycle - applying 20x priority
Campaign a8998f69-bb75-4b2e-9d80-5e9815533ecb is First Time Cycle - applying 20x priority
Campaign b4154640-a977-4927-8bd0-932154cfca90 is First Time Cycle - applying 20x priority
Campaign b439f3cf-5b49-455b-b9e5-5f1c90f34772 is First Time Cycle - applying 20x priority
Campaign be966cd9-ba2f-4ef9-b435-2a2a9