In [2]:
# invoice_processing/invoice_processing.py

import sys
import os
import logging
from pymongo import MongoClient, UpdateOne
import pandas as pd
from datetime import datetime, timezone
from tqdm import tqdm  # For progress bars
from dotenv import load_dotenv

# Absolute path to the config directory
CONFIG_DIR = r'C:\Users\ak012\Documents\Projects\config'

# Add the config directory to the Python path
if CONFIG_DIR not in sys.path:
    sys.path.append(CONFIG_DIR)

import config  # Now you can import config from the specified directory

try:
    import config  # Now you can import config from the parent directory
except ImportError as e:
    print(f"Configuration file not found: {e}")
    sys.exit(1)

# Load environment variables
load_dotenv()

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()

# Securely load sensitive information from environment variables
MONGO_URI = ('Insert-here-')
if not MONGO_URI:
    logger.error("MONGO_URI not found in environment variables.")
    sys.exit(1)

DATABASE_NAME = "legitt-prod"
INVOICES_COLLECTION_NAME = "company_invoices"
CONTRACTS_COLLECTION_NAME = "company_contracts"

def log_configurations():
    logger.info("Configuration Parameters:")
    for attr in dir(config):
        if not attr.startswith("__") and not callable(getattr(config, attr)):
            logger.info(f"{attr}: {getattr(config, attr)}")

def calculate_actual_contract_value_today(contract, today):
    """Calculate the approximate value of the contract as of today."""
    contract_value = contract.get('contract_value', 0) or 0
    total_renewal_amount = contract.get('sum_renewals', 0) or 0
    from_date = contract.get('from_date')
    to_date = contract.get('to_date')

    if pd.isnull(from_date) or pd.isnull(to_date):
        return contract_value + total_renewal_amount

    if today <= to_date:
        days_since_start = (today - from_date).days
        return (contract_value * days_since_start / 365) + total_renewal_amount

    return contract_value + total_renewal_amount

def calculate_interference(row, apply_coverage_ratio):
    """Calculate interference score."""
    overdue_days = row['overdue_days']
    overdue_multiplier = config.OVERDUE_MULTIPLIER if overdue_days > 60 else 0
    base_score = overdue_days * overdue_multiplier

    # Add raised_not_sent_weights if applicable
    raised_not_sent_weights = row.get('raised_not_sent_weights', 0)

    if apply_coverage_ratio:
        coverage_penalty = row['coverage_penalty']
        return base_score + coverage_penalty + raised_not_sent_weights

    return base_score + raised_not_sent_weights

def assign_rating(score):
    """Assign rating based on interference score."""
    if score < config.SCORE_THRESHOLD_GREEN:
        return "Green"
    elif config.SCORE_THRESHOLD_GREEN <= score < config.SCORE_THRESHOLD_YELLOW:
        return "Yellow"
    else:
        return "Red"

def calculate_raised_not_sent_weights(days):
    """Calculate weights for invoices in the 'raised' status not sent to 'pending.'"""
    if days <= config.RAISED_NOT_SENT_WEIGHT_FIRST_TOLERANCE_LEVEL:
        return config.RAISED_NOT_SENT_WEIGHT_ZERO
    elif config.RAISED_NOT_SENT_WEIGHT_FIRST_TOLERANCE_LEVEL < days <= config.RAISED_NOT_SENT_WEIGHT_SECOND_TOLERANCE_LEVEL:
        return config.RAISED_NOT_SENT_WEIGHT_FIRST_ELIF * (days - config.RAISED_NOT_SENT_WEIGHT_FIRST_TOLERANCE_LEVEL)
    elif days >= config.RAISED_NOT_SENT_WEIGHT_SECOND_TOLERANCE_LEVEL + 1:
        return config.RAISED_NOT_SENT_WEIGHT_SECOND_ELIF
    else:
        return config.RAISED_NOT_SENT_WEIGHT_ZERO  # Default case

def build_contracts_mapping(contracts, today):
    """Builds a mapping of root contracts and their attributes."""
    root_contracts = {}
    for contract in contracts:
        root_id = contract.get('contract_id')
        if not root_id:
            logger.warning(f"Contract without 'contract_id' found: {contract}")
            continue

        contract_value = contract.get('contract_value', 0) or 0
        sum_renewals = contract.get('sum_renewals', 0) or 0
        from_date = pd.to_datetime(contract.get('from_date'), errors='coerce', utc=True)
        to_date = pd.to_datetime(contract.get('to_date'), errors='coerce', utc=True)

        root_contracts[root_id] = {
            'contract_value': contract_value,
            'sum_renewals': sum_renewals,
            'from_date': from_date,
            'to_date': to_date,
        }

        root_contracts[root_id]['actual_contract_value_today'] = calculate_actual_contract_value_today(
            root_contracts[root_id], today
        )

    return root_contracts

def process_invoices():
    # User choice: Apply coverage ratio impact
    try:
        apply_coverage_ratio = input(
            "Do you want to apply the impact of coverage ratio to the interference rating? (yes/no): "
        ).strip().lower() == 'yes'
    except Exception as e:
        logger.error(f"Error reading user input: {e}")
        return

    # Log configuration parameters
    log_configurations()

    # Connect to MongoDB
    try:
        client = MongoClient(MONGO_URI)
        db = client[DATABASE_NAME]
        invoices_collection = db[INVOICES_COLLECTION_NAME]
        contracts_collection = db[CONTRACTS_COLLECTION_NAME]
        logger.info("Connected to MongoDB successfully.")
    except Exception as e:
        logger.error(f"Failed to connect to MongoDB: {e}")
        return

    # Fetch data from MongoDB
    try:
        invoices = list(invoices_collection.find())
        contracts = list(contracts_collection.find())
        if not invoices:
            logger.info("No invoices to process.")
            return
        logger.info(f"Fetched {len(invoices)} invoices and {len(contracts)} contracts.")
    except Exception as e:
        logger.error(f"Error fetching data: {e}")
        return

    # Current date
    today = datetime.now(timezone.utc)

    # Build contracts mapping
    contracts_mapping = build_contracts_mapping(contracts, today)

    # Create DataFrame from invoices
    df_invoices = pd.DataFrame(invoices)

    # Convert 'due_date' to datetime if present
    if 'due_date' in df_invoices.columns:
        df_invoices['due_date'] = pd.to_datetime(df_invoices['due_date'], errors='coerce', utc=True)

    # Calculate overdue_days
    df_invoices['overdue_days'] = df_invoices.apply(
        lambda row: max((today - row['due_date']).days, 0) if row['due_date'] and today > row['due_date'] else 0, axis=1
    )

    # Calculate raised_not_sent_days
    df_invoices['raised_not_sent_days'] = df_invoices.apply(
        lambda row: max((today - pd.to_datetime(row['invoice_date'], errors='coerce', utc=True)).days, 0) if row['status'] == 'Raised' else 0, axis=1
    )

    # Calculate raised_not_sent_weights
    df_invoices['raised_not_sent_weights'] = df_invoices['raised_not_sent_days'].apply(calculate_raised_not_sent_weights)

    # Map contract details
    df_invoices['root_contract_id'] = df_invoices['contract_id'].map(lambda x: contracts_mapping.get(x, {}).get('root_contract_id'))
    df_invoices['actual_contract_value_today'] = df_invoices['contract_id'].map(
        lambda x: contracts_mapping.get(x, {}).get('actual_contract_value_today', 0)
    )

    # Calculate coverage_ratio
    df_invoices['coverage_ratio'] = df_invoices['total_amount'] / df_invoices['actual_contract_value_today'].replace(0, 1)

    # Calculate coverage_penalty
    df_invoices['coverage_penalty'] = df_invoices['coverage_ratio'].apply(
        lambda x: (1 - x) * config.COVERAGE_PENALTY_WEIGHT if 0 <= x < 1 else 0
    )

    # Calculate interference_score
    df_invoices['interference_score'] = df_invoices.apply(
        lambda row: calculate_interference(row, apply_coverage_ratio), axis=1
    )

    # Assign ratings based on interference_score
    df_invoices['rating'] = df_invoices['interference_score'].apply(assign_rating)

    # Remove timezone information from datetime columns for Excel compatibility
    datetime_cols = df_invoices.select_dtypes(include=['datetime64[ns, UTC]']).columns
    for col in datetime_cols:
        df_invoices[col] = df_invoices[col].dt.tz_localize(None)

    # Prepare data for MongoDB update
    try:
        operations = []
        for _, row in tqdm(df_invoices.iterrows(), total=df_invoices.shape[0], desc="Updating invoices"):
            interference_info = {
                "root_contract_id": row['root_contract_id'],
                "from_date": row['from_date'],
                "to_date": row['to_date'],
                "overdue_days": row['overdue_days'],
                "interference_score": row['interference_score'],
                "rating": row['rating'],
                "sum_of_invoices": row['total_amount'],
                "actual_contract_value": row['actual_contract_value_today'],
                "coverage_ratio": row['coverage_ratio'],
                "coverage_penalty": row['coverage_penalty'],
                "raised_not_sent_days": row['raised_not_sent_days'],
                "raised_not_sent_weights": row['raised_not_sent_weights']
            }

            operations.append(
                UpdateOne(
                    {"_id": row['_id']},
                    {"$set": {"interference_info": interference_info}},
                    upsert=True
                )
            )

        if operations:
            result = invoices_collection.bulk_write(operations)
            logger.info(f"Bulk update completed. Matched: {result.matched_count}, Modified: {result.modified_count}, Upserted: {result.upserted_count}.")
    except Exception as e:
        logger.error(f"Error during bulk update to MongoDB: {e}")

    # Save results to Excel
    try:
        output_file_excel = "processed_invoices_with_ratings.xlsx"
        df_invoices.to_excel(output_file_excel, index=False)
        logger.info(f"Processed data saved to {output_file_excel}")
    except Exception as e:
        logger.error(f"Error saving Excel file: {e}")

if __name__ == "__main__":
    process_invoices()

2024-12-27 12:29:56,283 - INFO - Configuration Parameters:
2024-12-27 12:29:56,284 - INFO - COVERAGE_PENALTY_WEIGHT: 20
2024-12-27 12:29:56,286 - INFO - MILESTONE_DAYS_THRESHOLD_3: 3
2024-12-27 12:29:56,287 - INFO - MILESTONE_DAYS_THRESHOLD_5: 5
2024-12-27 12:29:56,289 - INFO - MILESTONE_DAYS_THRESHOLD_7: 7
2024-12-27 12:29:56,291 - INFO - MILESTONE_PENALTY_LESS_THAN_3_DAYS: 5
2024-12-27 12:29:56,293 - INFO - MILESTONE_PENALTY_LESS_THAN_5_DAYS: 10
2024-12-27 12:29:56,295 - INFO - MILESTONE_PENALTY_LESS_THAN_7_DAYS: 15
2024-12-27 12:29:56,296 - INFO - MILESTONE_PENALTY_MORE_THAN_7_DAYS: 20
2024-12-27 12:29:56,298 - INFO - OVERDUE_MULTIPLIER: 0.1
2024-12-27 12:29:56,301 - INFO - RAISED_NOT_SENT_WEIGHT_FIRST_ELIF: 3
2024-12-27 12:29:56,302 - INFO - RAISED_NOT_SENT_WEIGHT_FIRST_TOLERANCE_LEVEL: 2
2024-12-27 12:29:56,304 - INFO - RAISED_NOT_SENT_WEIGHT_SECOND_ELIF: 7
2024-12-27 12:29:56,305 - INFO - RAISED_NOT_SENT_WEIGHT_SECOND_TOLERANCE_LEVEL: 6
2024-12-27 12:29:56,307 - INFO - RAISED_NOT