In [3]:
# contracts_processing.py

import sys
import os

# Add the config directory to the Python path
CONFIG_DIR = r'C:\Users\ak012\Documents\Projects\config'
if CONFIG_DIR not in sys.path:
    sys.path.append(CONFIG_DIR)

import config  # Now you can import config from the specified directory

import pandas as pd
import numpy as np
from pymongo import MongoClient, UpdateOne
from datetime import datetime, timezone
import logging
from tqdm import tqdm
from dotenv import load_dotenv

# ==============================
# 1. Load Environment Variables
# ==============================
load_dotenv()

# ==============================
# 2. Configure Logging
# ==============================
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(),
        logging.FileHandler("contract_processing.log")
    ]
)
logger = logging.getLogger()

# ==============================
# 3. MongoDB Connection Details
# ==============================
MONGO_URI = ('Insert-here-')  
if not MONGO_URI:
    logger.error("MONGO_URI not found in environment variables.")
    sys.exit(1)

DATABASE_NAME = "legitt-prod"
CONTRACTS_COLLECTION = "company_contracts"

# ==============================
# 4. Helper Functions
# ==============================
def calculate_interference(contract_row, apply_coverage_ratio):
    overdue_days = contract_row.get('overdue_days', 0)
    status_score = 0 if contract_row.get('status', '').strip().lower() == "completed" else 20
    overdue_multiplier = 0 if overdue_days < 61 else config.OVERDUE_MULTIPLIER  # Updated to use config
    base_score = overdue_days * overdue_multiplier + status_score

    if apply_coverage_ratio:
        coverage_ratio = contract_row.get('coverage_ratio', 1.0)
        coverage_penalty = (1 - coverage_ratio) * config.COVERAGE_PENALTY_WEIGHT if coverage_ratio < 1 else 0  # Updated to use config
        return base_score + coverage_penalty

    return base_score

def assign_rating(score):
    """Assign rating based on interference score using config thresholds."""
    if score < config.SCORE_THRESHOLD_GREEN:  # Updated to use config
        return "Green"
    elif config.SCORE_THRESHOLD_GREEN <= score < config.SCORE_THRESHOLD_YELLOW:  # Updated to use config
        return "Yellow"
    else:
        return "Red"

def calculate_actual_contract_value_today(row, today):
    contract_value = row.get('contract_value', 0)
    total_renewal_amount = row.get('total_renewal_amount', 0)

    from_date = row.get('from_date')
    to_date = row.get('to_date')

    if pd.isnull(from_date) or pd.isnull(to_date):
        return contract_value + total_renewal_amount

    if today <= to_date:
        days_since_start = (today - from_date).days
        return (contract_value * days_since_start / 365) + total_renewal_amount

    return contract_value + total_renewal_amount

def calculate_total_renewals(contracts):
    renewal_mapping = {}
    for contract in contracts:
        contract_id = contract.get('contract_id', '')
        renewals = contract.get('renewals', [])
        total_renewal = sum(
            renewal.get('renewal_amount', 0) 
            for renewal in renewals 
            if isinstance(renewal.get('renewal_amount', 0), (int, float))
        )
        renewal_mapping[contract_id] = total_renewal
        logging.info(f"Contract '{contract_id}' has total renewals: {total_renewal}")
    return renewal_mapping

# ==============================
# 5. Helper Function for Milestones
# ==============================
def calculate_milestone_penalty(milestone_days_late):
    """Calculate penalty based on how many days a milestone is missed."""
    if milestone_days_late < config.MILESTONE_DAYS_THRESHOLD_3:
        return config.MILESTONE_PENALTY_LESS_THAN_3_DAYS
    elif config.MILESTONE_DAYS_THRESHOLD_3 <= milestone_days_late < config.MILESTONE_DAYS_THRESHOLD_5:
        return config.MILESTONE_PENALTY_LESS_THAN_5_DAYS
    elif config.MILESTONE_DAYS_THRESHOLD_5 <= milestone_days_late < config.MILESTONE_DAYS_THRESHOLD_7:
        return config.MILESTONE_PENALTY_LESS_THAN_7_DAYS
    else:
        return config.MILESTONE_PENALTY_MORE_THAN_7_DAYS

# ==============================
# 6. Main Processing Function
# ==============================
def process_invoices_and_contracts():
    # User input to apply coverage ratio
    apply_coverage_ratio_input = input("Do you want to apply coverage ratio to the interference rating? (yes/no): ").strip().lower()
    apply_coverage_ratio = apply_coverage_ratio_input == 'yes'

    # New User input to apply milestone penalties
    apply_milestone_penalty_input = input("Do you want to incorporate milestone penalties? (yes/no): ").strip().lower()
    apply_milestone_penalty = apply_milestone_penalty_input == 'yes'

    logging.info(f"Apply coverage ratio: {apply_coverage_ratio}")
    logging.info(f"Apply milestone penalties: {apply_milestone_penalty}")

    try:
        client = MongoClient(MONGO_URI)
        db = client[DATABASE_NAME]
        contracts_collection = db[CONTRACTS_COLLECTION]
        logging.info("Connected to MongoDB successfully.")
    except Exception as e:
        logging.error(f"Failed to connect to MongoDB: {e}")
        return

    try:
        contracts = list(contracts_collection.find())
        logging.info(f"Number of contracts fetched: {len(contracts)}")
    except Exception as e:
        logging.error(f"Error fetching contracts: {e}")
        return

    if not contracts:
        logging.info("No contracts to process.")
        return

    df_contracts = pd.DataFrame(contracts)
    today = datetime.now(timezone.utc)

    # Rename date columns if necessary
    if 'start_date' in df_contracts.columns and 'end_date' in df_contracts.columns:
        df_contracts.rename(columns={"start_date": "from_date", "end_date": "to_date"}, inplace=True)
        logging.info("Renamed 'start_date' to 'from_date' and 'end_date' to 'to_date'.")

    # Convert date columns to datetime
    contract_date_cols = ['from_date', 'to_date']
    for col in contract_date_cols:
        if col in df_contracts.columns:
            df_contracts[col] = pd.to_datetime(df_contracts[col], errors='coerce', utc=True)

    # Calculate total renewals
    renewal_mapping = calculate_total_renewals(contracts)
    df_contracts['total_renewal_amount'] = df_contracts['contract_id'].map(renewal_mapping).fillna(0.0)

    # Handle missing 'contract_value'
    if 'contract_value' not in df_contracts.columns:
        df_contracts['contract_value'] = 0.0
        logging.warning("'contract_value' column not found in contracts. Set to 0.0.")

    # Calculate actual contract value today
    df_contracts['actual_contract_value_today'] = df_contracts.apply(
        lambda row: calculate_actual_contract_value_today(row, today), axis=1
    )
    logging.info("Calculated 'actual_contract_value_today' for contracts.")

    # Calculate coverage ratio
    df_contracts['coverage_ratio'] = df_contracts['actual_contract_value_today'].apply(
        lambda x: 1 if x > 0 else 0
    )

    # ==============================
    # 7. Milestones Processing
    # ==============================
    if apply_milestone_penalty:
        milestones_file_path = r"C:\Users\ak012\Documents\Projects\Contracts\simulated_milestones.xlsx"
        try:
            df_milestones = pd.read_excel(milestones_file_path)
            logging.info(f"Milestones data loaded from {milestones_file_path}.")
        except Exception as e:
            logging.error(f"Error loading milestones Excel file: {e}")
            df_milestones = pd.DataFrame()  # Empty DataFrame to avoid further errors

        if not df_milestones.empty:
            # Ensure necessary columns exist
            required_columns = ['contract_name', 'milestone_due_date', 'completion_date']
            if all(col in df_milestones.columns for col in required_columns):
                df_milestones['milestone_due_date'] = pd.to_datetime(df_milestones['milestone_due_date'], errors='coerce', utc=True)
                df_milestones['completion_date'] = pd.to_datetime(df_milestones['completion_date'], errors='coerce', utc=True)

                # Calculate days late for each milestone
                df_milestones['days_late'] = (df_milestones['completion_date'] - df_milestones['milestone_due_date']).dt.days
                df_milestones['days_late'] = df_milestones['days_late'].apply(lambda x: x if x > 0 else 0)

                # Calculate penalties
                df_milestones['milestone_penalty'] = df_milestones['days_late'].apply(calculate_milestone_penalty)

                # Aggregate penalties per contract
                penalty_mapping = df_milestones.groupby('contract_name')['milestone_penalty'].sum().to_dict()
                df_contracts['milestone_penalty'] = df_contracts['contract_name'].map(penalty_mapping).fillna(0.0)
                logging.info("Calculated milestone penalties for contracts.")
            else:
                logging.error(f"Milestones Excel file is missing required columns: {required_columns}")
                df_contracts['milestone_penalty'] = 0.0
        else:
            df_contracts['milestone_penalty'] = 0.0
            logging.info("No milestones data to process.")
    else:
        # If milestone penalties are not applied, set penalties to 0
        df_contracts['milestone_penalty'] = 0.0
        logging.info("Milestone penalties are not applied as per user input.")

    # ==============================
    # 8. Calculate Interference Score
    # ==============================
    df_contracts['final_interference_score'] = df_contracts.apply(
        lambda row: calculate_interference(row, apply_coverage_ratio) + row.get('milestone_penalty', 0),
        axis=1
    )
    logging.info("Calculated 'final_interference_score' for contracts, including milestone penalties if applied.")

    # Assign ratings based on interference_score
    df_contracts['final_rating'] = df_contracts['final_interference_score'].apply(assign_rating)
    logging.info("Assigned 'final_rating' based on interference scores.")

    # ==============================
    # 9. Save to Excel
    # ==============================
    output_excel = r"C:\\Users\\ak012\\Documents\\Projects\\Contracts\\processed_contracts.xlsx"

    if 'df_contracts' in locals() and not df_contracts.empty:
        # Remove timezone information from datetime columns
        datetime_cols = df_contracts.select_dtypes(include=['datetime64[ns, UTC]']).columns
        for col in datetime_cols:
            df_contracts[col] = df_contracts[col].dt.tz_localize(None)

        try:
            df_contracts.to_excel(output_excel, index=False)
            logging.info(f"Processed data saved to Excel: {output_excel}")
        except Exception as e:
            logging.error(f"Error saving Excel file: {e}")
    else:
        logging.error("DataFrame 'df_contracts' is not defined or is empty.")

    # ==============================
    # 10. Close MongoDB Connection
    # ==============================
    try:
        client.close()
        logging.info("Closed MongoDB connection.")
    except Exception as e:
        logging.error(f"Error closing MongoDB connection: {e}")

if __name__ == "__main__":
    process_invoices_and_contracts()

2024-12-27 11:35:26,544 - INFO - Apply coverage ratio: True
2024-12-27 11:35:26,546 - INFO - Apply milestone penalties: False
2024-12-27 11:35:26,843 - INFO - Connected to MongoDB successfully.
2024-12-27 11:36:36,662 - INFO - Number of contracts fetched: 7652
2024-12-27 11:36:36,799 - INFO - Renamed 'start_date' to 'from_date' and 'end_date' to 'to_date'.
2024-12-27 11:36:36,846 - INFO - Contract '8002s000000cF4kAAE' has total renewals: 0
2024-12-27 11:36:36,848 - INFO - Contract '8002s000000cF3cAAE' has total renewals: 0
2024-12-27 11:36:36,850 - INFO - Contract '8002s000000cF7MAAU' has total renewals: 0
2024-12-27 11:36:36,851 - INFO - Contract '8006F000000Se44QAC' has total renewals: 4572
2024-12-27 11:36:36,852 - INFO - Contract '8002s000000lxrAAAQ' has total renewals: 442
2024-12-27 11:36:36,853 - INFO - Contract '8006F000000XlzDQAS' has total renewals: 0
2024-12-27 11:36:36,855 - INFO - Contract '8006F000000nU9AQAU' has total renewals: 0
2024-12-27 11:36:36,856 - INFO - Contract