In [3]:
import pandas as pd
from pymongo import MongoClient
import json
import logging
import boto3
from io import BytesIO
import random


# ---------- Setup Logger ----------
logger = logging.getLogger()
logger.setLevel(logging.INFO)  # Change to DEBUG for verbose logging
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(funcName)s - %(message)s")

if not logger.handlers:
    ch = logging.StreamHandler()
    ch.setFormatter(formatter)
    logger.addHandler(ch)

# ---------------- Rollup Function ----------------
def build_rollup_fact(transaction_data, dimensions, date_col, measure_col):
    logger.info("▶ Building rollup fact table")

    # Validate date and measure column exist
    if date_col not in transaction_data.columns:
        raise ValueError(f"❌ Date column '{date_col}' not found in transaction data")
    if measure_col not in transaction_data.columns:
        raise ValueError(f"❌ Measure column '{measure_col}' not found in transaction data")

    # Ensure posting_date is datetime
    transaction_data[date_col] = pd.to_datetime(transaction_data[date_col], errors="coerce")
    transaction_data["Year"] = transaction_data[date_col].dt.year
    transaction_data["Quarter"] = transaction_data[date_col].dt.to_period("Q").astype(str)
    transaction_data["Month"] = transaction_data[date_col].dt.strftime("%b-%Y")

    # Build parent-child maps for all dimensions
    parent_maps = {}
    for dim in dimensions:
        if not dim["id_col"]:
            raise ValueError(f"❌ Dimension {dim['dim_name']} is missing id_col")

        if dim["id_col"] not in dim["dim_df"].columns:
            raise ValueError(f"❌ Column {dim['id_col']} not found in {dim['dim_name']}")

        # If parent_col missing, use id_col as self-parent (flat hierarchy)
        parent_col = dim["parent_col"] or dim["id_col"]

        if parent_col not in dim["dim_df"].columns:
            # If parent_col is supposed to be id_col, it’s safe. Otherwise, error out.
            if parent_col != dim["id_col"]:
                raise ValueError(f"❌ Parent column {parent_col} not found in {dim['dim_name']}")

        parent_maps[dim["dim_name"]] = dict(
            zip(dim["dim_df"][dim["id_col"]], dim["dim_df"][parent_col])
        )
    logger.info("✅ Parent-child maps built for %d dimensions", len(dimensions))

    def get_ancestors(node, parent_map):
        ancestors = [node]
        while node in parent_map and parent_map[node] != node:
            node = parent_map[node]
            ancestors.append(node)
        return ancestors

    expanded_rows = []

    for _, row in transaction_data.iterrows():
        dim_ancestors_list = []
        for dim in dimensions:
            if dim["trans_id_col"] not in row:
                raise ValueError(f"❌ Transaction column {dim['trans_id_col']} not found in data")
            node = row[dim["trans_id_col"]]
            ancestors = get_ancestors(node, parent_maps[dim["dim_name"]])
            dim_ancestors_list.append((dim["dim_name"], ancestors))

        def cartesian_product(idx, current_combination):
            if idx == len(dim_ancestors_list):
                expanded_rows.append({
                    **{dim["trans_id_col"]: v for dim, v in zip(dimensions, current_combination)},
                    "Year": row["Year"],
                    "Quarter": row["Quarter"],
                    "Month": row["Month"],
                    measure_col: row[measure_col]
                })

                return
            dim_name, ancestors = dim_ancestors_list[idx]
            for ancestor in ancestors:
                cartesian_product(idx + 1, current_combination + [ancestor])

        cartesian_product(0, [])

    expanded_df = pd.DataFrame(expanded_rows)
    logger.info("✅ Expanded transactions into %d rows", len(expanded_df))

    group_cols = [dim["trans_id_col"] for dim in dimensions] + ["Year", "Quarter", "Month"]
    fact_table = expanded_df.groupby(group_cols, as_index=False)[measure_col].sum()

    logger.info("✅ Rollup fact table created with %d rows", len(fact_table))
    return fact_table

In [12]:
def fetch_forecast_data_from_s3(project_id: str, planning_scenario_id: str, new_filename_base: str,mapping_collection,user_id: str, s3_client=None, bucket_name="dev-ai-analytics-private") -> pd.DataFrame:
    """
    Fetch transaction data from S3 parquet file and keep only required columns.
    
    Args:
        project_id (str): Project identifier
        planning_scenario_id (str): Planning scenario identifier
        new_filename_base (str): Base name of parquet file
        mapping_collection: Mongo collection with mapping info
        user_id (str): Current user ID
        s3_client: Optional boto3 S3 client
        bucket_name (str): S3 bucket name (default: dev-ai-analytics-private)
    
    Returns:
        pd.DataFrame: Filtered transaction data
    """
    try:
        # --- Step 1: Build S3 key ---
        s3_key = f"fpa/forecast_data/{project_id}/{planning_scenario_id}/{new_filename_base}_forecast.parquet"
        logger.info("▶ Fetching from S3: s3://%s/%s", bucket_name, s3_key)

        if s3_client is None:
            s3_client = boto3.client("s3")

        # --- Step 2: Read parquet from S3 ---
        response = s3_client.get_object(Bucket=bucket_name, Key=s3_key)
        df = pd.read_parquet(BytesIO(response["Body"].read()))
        logger.info("✅ Loaded parquet with %d rows, %d columns", df.shape[0], df.shape[1])

        # --- Step 3: Get required columns from Mongo ---
        mapping_doc = mapping_collection.find_one(
            {"user_id": user_id, "project_id": project_id},
            {"_id": 0, f"planning_scenarios.{planning_scenario_id}.result": 1}
        )

        required_cols = []
        if mapping_doc:
            result_dict = (
                mapping_doc.get("planning_scenarios", {})
                           .get(planning_scenario_id, {})
                           .get("result", {})
            )
            if result_dict:
                required_cols = [col for col in result_dict.values() if col]

        # --- Step 4: Filter DataFrame (always include 'month') ---
        if required_cols:
            cols_to_keep = list(set(required_cols + ["month"]))  # add 'month'
            available_cols = [col for col in cols_to_keep if col in df.columns]

            missing_cols = [col for col in cols_to_keep if col not in df.columns]
            if missing_cols:
                logger.warning("⚠️ Missing required columns in file: %s", missing_cols)

            df = df[available_cols]
            logger.info("✅ Kept columns: %s", available_cols)
        else:
            logger.warning("⚠️ No required columns mapping found, keeping full dataset")
            if "month" not in df.columns:
                logger.warning("⚠️ Column 'month' not found in dataset!")

        return df

    except Exception as e:
        logger.error("❌ Error fetching transaction data: %s", str(e), exc_info=True)
        return pd.DataFrame()
    

    
project_id = "01K4CZ0YJW77DS3BHVGVQSJSAD"
planning_scenario_id= "01K1TAXXNV4GP7FAKYJSWCCEZP"
mongo_uri =  "mongodb://devfpauser:Ok74c3YE7GLN@13.202.247.111:27017/devfpadb"
db_name =  "devfpadb" 
client = MongoClient(mongo_uri)
db = client[db_name]
mapping_collection = db["mapping_results"]

fetch_forecast_data_from_s3(project_id, planning_scenario_id, transaction_filename , mapping_collection ,user_id = "18" , bucket_name="dev-ai-analytics-private")

2025-09-16 12:29:40,760 - INFO - fetch_forecast_data_from_s3 - ▶ Fetching from S3: s3://dev-ai-analytics-private/fpa/forecast_data/01K4CZ0YJW77DS3BHVGVQSJSAD/01K1TAXXNV4GP7FAKYJSWCCEZP/trans_forecast.parquet
2025-09-16 12:29:43,591 - INFO - fetch_forecast_data_from_s3 - ✅ Loaded parquet with 186180 rows, 7 columns


Unnamed: 0,month,gl_account_id,cost_center_id,company_code_id,profit_center_id,amount,scenario
0,2025-07-01,5000002,CCTR002,CCODE001,PCENT002,15355.317560,Forecast
1,2025-07-01,5000002,CCTR002,CCODE001,PCENT004,35891.736702,Forecast
2,2025-07-01,5000002,CCTR002,CCODE001,PCENT005,12263.348119,Forecast
3,2025-07-01,5000002,CCTR002,CCODE001,PCENT006,51920.695931,Forecast
4,2025-07-01,5000002,CCTR002,CCODE001,PCENT007,4686.953279,Forecast
...,...,...,...,...,...,...,...
186175,2026-06-01,5000072,CCTR008,CCODE007,PCENT004,41000.777911,Forecast
186176,2026-06-01,5000072,CCTR008,CCODE007,PCENT005,59426.177058,Forecast
186177,2026-06-01,5000072,CCTR008,CCODE007,PCENT006,1972.324213,Forecast
186178,2026-06-01,5000072,CCTR008,CCODE007,PCENT007,47397.674507,Forecast


In [5]:
def prepare_dimension_list(db, user_id, project_id, planning_scenario_id):
    logger.info("▶ Preparing dimension list")
    files_collection = db["fileuploaddata"]

    file_docs = list(files_collection.find({
        "user_id": user_id,
        "project_id": project_id,
        "planning_scenario_id": planning_scenario_id
    }))

    if not file_docs:
        raise FileNotFoundError("❌ Dimension files not found")

    # Normalize helper: lowercase + remove underscores
    def normalize(name: str) -> str:
        return name.replace("_", "").lower() if name else ""

    # Group uploaded dimension files
    grouped_files = {}
    for doc in file_docs:
        filename = doc.get("filename")
        if filename in ["date_dimension", "version_dimension"]:
            continue

        data_obj = doc.get("data")
        if isinstance(data_obj, dict):
            row_df = pd.DataFrame([data_obj])
        elif isinstance(data_obj, list):
            row_df = pd.DataFrame(data_obj)
        else:
            row_df = pd.DataFrame()

        grouped_files[filename] = pd.concat(
            [grouped_files.get(filename, pd.DataFrame()), row_df], ignore_index=True
        )

    if not grouped_files:
        raise FileNotFoundError("❌ No valid dimension files found after filtering")

    # Fetch mapping
    mapping_collection = db["mapping_results"]
    doc = mapping_collection.find_one({
        "user_id": user_id,
        "project_id": project_id,
        f"planning_scenarios.{planning_scenario_id}": {"$exists": True}
    })
    mapping = {}
    if doc:
        scenario_mapping = doc["planning_scenarios"].get(planning_scenario_id, {})
        mapping = scenario_mapping.get("result", {})

    if not mapping:
        raise ValueError("❌ Mapping is required but missing")

    col_names_collection = db["table_metadata"]
    dimensions = []

    for fname, df in grouped_files.items():
        transaction_col_name = None
        for k, v in mapping.items():
            if not v:  # skip nulls
                continue

            # normalize both sides
            key_base = k.replace("_dimension", "")
            if normalize(fname) == normalize(key_base):
                transaction_col_name = v
                break

        if not transaction_col_name:
            logger.warning("⚠️ Skipping dimension %s because no transaction mapping found", fname)
            continue

        # Fetch metadata for this dimension
        col_doc = col_names_collection.find_one({
            "user_id": user_id,
            "project_id": project_id,
            "scenarios.planning_scenario_id": planning_scenario_id
        }, {"_id": 0, "scenarios": 1})

        id_col = None
        hierarchy_col = None
        if col_doc:
            for scenario in col_doc.get("scenarios", []):
                if scenario.get("planning_scenario_id") == planning_scenario_id:
                    for table in scenario.get("tables", []):
                        if table.get("table_name") == fname:
                            id_col = (table.get("unique_id_columns") or [None])[0]
                            hierarchy_col = (table.get("hierarchy_columns") or [None])[0]
                            break

        if not id_col:
            logger.error("❌ Skipping dimension %s because id_col is missing", fname)
            continue

        # hierarchy_col can be None (flat hierarchy)
        dimensions.append({
            "dim_name": fname,
            "dim_df": df,
            "id_col": id_col,
            "parent_col": hierarchy_col,  # can be None
            "trans_id_col": transaction_col_name
        })

    logger.info("✅ Prepared %d dimensions", len(dimensions))
    return dimensions

In [6]:
def get_recommended_dimensions(db, project_id, planning_scenario_id, filename=None):
    logger.info("▶ Fetching recommended dimensions & measures for scenario=%s", planning_scenario_id)
    rec_collection = db["recommendeddimensions"]

    query = {
        "project_id": project_id,
        "planning_scenario_id": planning_scenario_id
    }
    if filename:
        query["filename"] = filename

    doc = rec_collection.find_one(query)

    if not doc:
        logger.warning("⚠️ No recommended data found")
        return {"dimensions": [], "measures": []}

    return {
        "dimensions": doc.get("rows", []),
        "measures": doc.get("columns", {})
    }


In [None]:
def insert_template_to_s3(df, user_id, project_id, planning_scenario_id,file_name, bucket_name="dev-ai-analytics-private"):
    logger.info("▶ Storing template to S3 for project_id=%s, planning_scenario_id=%s", project_id, planning_scenario_id)

    if df.empty:
        logger.warning("⚠️ DataFrame is empty. Nothing to store.")
        return None

    # ✅ Also force all *_id columns to string (safety net)
    for col in df.columns:
        if col.endswith("_id"):
            df[col] = df[col].astype(str)

    # Generate deterministic S3 path
    s3_key = f"fpa/forecast_template/{project_id}/{planning_scenario_id}/{file_name}_template.parquet"
    s3_path = f"s3://{bucket_name}/{s3_key}"

    try:
        s3 = boto3.client("s3")
        buffer = BytesIO()
        df.to_parquet(buffer, index=False, engine="pyarrow")  # ✅ safe now
        buffer.seek(0)

        s3.put_object(Bucket=bucket_name, Key=s3_key, Body=buffer.getvalue())
        logger.info("✅ Stored %d rows to S3 at %s (replaced if existed)", len(df), s3_path)
        return s3_path

    except Exception as e:
        logger.error("❌ Failed to store DataFrame to S3: %s", e, exc_info=True)
        return None

In [13]:
# ---------------- Lambda Handler ----------------
def lambda_handler(event, context):
    try:
        logger.info("🚀 Lambda triggered with event: %s", event)

        mongo_uri = event.get("mongo_uri")
        db_name = event.get("db_name", "devfpadb")
        user_id = int(event["user_id"])
        project_id = event["project_id"]
        planning_scenario_id = event["planning_scenario_id"]
        transaction_filename = event["transaction_filename"]
        

        client = MongoClient(mongo_uri)
        db = client[db_name]
        mapping_collection = db["mapping_results"]

        # Fetch transaction data
        
        transaction_df = fetch_forecast_data_from_s3(project_id, planning_scenario_id, transaction_filename, mapping_collection, user_id)
        logger.info("transaction_df : %s", transaction_df.head(2))
        

        if transaction_df is None or transaction_df.empty:
            raise ValueError(" No transaction data found")

        # Fetch all dimensions
        all_dimensions = prepare_dimension_list(db, user_id, project_id, planning_scenario_id)
        if not all_dimensions:
            raise ValueError(" No dimension definitions found")
        

        # Filter dimensions based on recommended list
        recommended_dims = get_recommended_dimensions(db, project_id, planning_scenario_id)
        print("Recommended_dims: ", recommended_dims)
        dimensions = [dim for dim in all_dimensions if dim["trans_id_col"] in recommended_dims.get("dimensions", [])]
        print("Dimensions: ", dimensions)
        if not dimensions:
            raise ValueError(" No dimensions selected after filtering by recommended list")

        # Fetch mappings
        print("recommendation_dims:", recommended_dims)
        date_col = "month"
        measure_col = recommended_dims["measures"].get("measure")


        if not date_col or not measure_col:
            raise ValueError(" Missing date or measure mapping in DB")

        # Build rollup fact table
        rollup_fact_df = build_rollup_fact(transaction_data=transaction_df, dimensions=dimensions, date_col=date_col, measure_col=measure_col)
        logger.info("rollup_fact :  %s", rollup_fact_df.head(2))

             
        s3_path_returned = insert_template_to_s3(rollup_fact_df, user_id, project_id, planning_scenario_id, transaction_filename)
        logger.info(" Rollup fact table built successfully")


        return {
            "statusCode": 200,
            "body": {
                "message": " Rollup fact table built successfully",
                "s3_path" : s3_path_returned
            }
        }

    except Exception as e:
        logger.error(" Lambda failed: %s", str(e), exc_info=True)
        return {
            "statusCode": 500,
            "body": {"error": str(e)}
        }


event = {
"mongo_uri": "mongodb://devfpauser:Ok74c3YE7GLN@13.202.247.111:27017/devfpadb",
"db_name": "devfpadb",
"user_id": 18,
"project_id": "01K4CZ0YJW77DS3BHVGVQSJSAD",
"planning_scenario_id": "01K1TAXXNV4GP7FAKYJSWCCEZP",
"transaction_filename": "trans",

}
response = lambda_handler(event, None)
print(response)


2025-09-16 12:29:56,180 - INFO - lambda_handler - 🚀 Lambda triggered with event: {'mongo_uri': 'mongodb://devfpauser:Ok74c3YE7GLN@13.202.247.111:27017/devfpadb', 'db_name': 'devfpadb', 'user_id': 18, 'project_id': '01K4CZ0YJW77DS3BHVGVQSJSAD', 'planning_scenario_id': '01K1TAXXNV4GP7FAKYJSWCCEZP', 'transaction_filename': 'trans'}
2025-09-16 12:29:56,182 - INFO - fetch_forecast_data_from_s3 - ▶ Fetching from S3: s3://dev-ai-analytics-private/fpa/forecast_data/01K4CZ0YJW77DS3BHVGVQSJSAD/01K1TAXXNV4GP7FAKYJSWCCEZP/trans_forecast.parquet
2025-09-16 12:29:57,926 - INFO - fetch_forecast_data_from_s3 - ✅ Loaded parquet with 186180 rows, 7 columns
2025-09-16 12:29:58,410 - INFO - fetch_forecast_data_from_s3 - ✅ Kept columns: ['cost_center_id', 'amount', 'profit_center_id', 'month', 'company_code_id', 'gl_account_id']
2025-09-16 12:29:58,413 - INFO - lambda_handler - transaction_df :   cost_center_id        amount profit_center_id      month company_code_id  \
0        CCTR002  15355.317560     

Recommended_dims:  {'dimensions': ['gl_account_id', 'cost_center_id', 'company_code_id', 'profit_center_id'], 'measures': {'measure': 'amount', 'date_dimension': 'posting_date'}}
Dimensions:  [{'dim_name': 'center_c', 'dim_df':         id       description hierarchy       cost_owner
0  CCTR001             Admin   CCTR001     Allison Hill
1  CCTR002        Production   CCTR001      Noah Rhodes
2  CCTR003                HR   CCTR001  Angie Henderson
3  CCTR004         Marketing   CCTR001    Daniel Wagner
4  CCTR005               R&D   CCTR001  Cristian Santos
5  CCTR006                IT   CCTR001  Connie Lawrence
6  CCTR007             Legal   CCTR001  Abigail Shaffer
7  CCTR008  Customer Support   CCTR001       Gina Moore, 'id_col': 'id', 'parent_col': 'hierarchy', 'trans_id_col': 'cost_center_id'}, {'dim_name': 'companycode', 'dim_df':          id       company_code
0  CCODE001  Hyd-head quarters
1  CCODE002               Hyd 
2  CCODE003          Bengaluru
3  CCODE004               P

2025-09-16 12:30:05,360 - INFO - build_rollup_fact - ✅ Parent-child maps built for 4 dimensions
2025-09-16 12:30:22,571 - INFO - build_rollup_fact - ✅ Expanded transactions into 1987440 rows
2025-09-16 12:30:23,063 - INFO - build_rollup_fact - ✅ Rollup fact table created with 350892 rows
2025-09-16 12:30:23,079 - INFO - lambda_handler - rollup_fact :    cost_center_id company_code_id gl_account_id profit_center_id  Year Quarter  \
0        CCTR001        CCODE001       5000001         PCENT001  2025  2025Q3   
1        CCTR001        CCODE001       5000001         PCENT001  2025  2025Q3   

      Month        amount  
0  Aug-2025  5.624555e+06  
1  Jul-2025  5.624555e+06  
2025-09-16 12:30:23,081 - INFO - insert_template_to_s3 - ▶ Storing template to S3 for project_id=01K4CZ0YJW77DS3BHVGVQSJSAD, planning_scenario_id=01K1TAXXNV4GP7FAKYJSWCCEZP
2025-09-16 12:30:25,537 - INFO - insert_template_to_s3 - ✅ Stored 350892 rows to S3 at s3://dev-ai-analytics-private/fpa/forecast_template/01K4CZ

{'statusCode': 200, 'body': {'message': ' Rollup fact table built successfully', 's3_path': 's3://dev-ai-analytics-private/fpa/forecast_template/01K4CZ0YJW77DS3BHVGVQSJSAD/01K1TAXXNV4GP7FAKYJSWCCEZP/trans_template.parquet'}}


In [None]:
import pickle

def store_pickle_to_s3(model, user_id, project_id, planning_scenario_id, file_name, bucket_name="dev-ai-analytics-private"):
    """
    Store the best model pickle file to S3.
    """
    logger.info("▶ Storing pickle file to S3 for project_id=%s, planning_scenario_id=%s", project_id, planning_scenario_id)

    try:
        s3 = boto3.client("s3")
        buffer = BytesIO()
        pickle.dump(model, buffer)
        buffer.seek(0)

        s3_key = f"fpa/forecast_model/{project_id}/{planning_scenario_id}/{file_name}_best_model.pkl"
        s3_path = f"s3://{bucket_name}/{s3_key}"

        s3.put_object(Bucket=bucket_name, Key=s3_key, Body=buffer.getvalue())
        logger.info("✅ Stored pickle to S3 at %s (replaced if existed)", s3_path)
        return s3_path

    except Exception as e:
        logger.error("❌ Failed to store pickle to S3: %s", e, exc_info=True)
        return None
    
transaction_filename = "trans"
model = 
store_pickle_to_s3(model, project_id, planning_scenario_id, file_name= transaction_filename)

In [1]:
from pymongo import MongoClient
from bson.objectid import ObjectId

def fetch_document(mongo_uri, db_name, collection_name, doc_id=None, planning_scenario_id=None):
    """
    Fetch a document from MongoDB collection based on _id or planning_scenario_id.
    
    Args:
        mongo_uri (str): MongoDB connection URI
        db_name (str): Database name
        collection_name (str): Collection name
        doc_id (str, optional): ObjectId of the document
        planning_scenario_id (str, optional): planning_scenario_id filter
    
    Returns:
        dict or None: Document if found, else None
    """
    client = MongoClient(mongo_uri)
    db = client[db_name]
    collection = db[collection_name]

    query = {}
    if doc_id:
        query["_id"] = ObjectId(doc_id)
    if planning_scenario_id:
        query["planning_scenario_id"] = planning_scenario_id

    document = collection.find_one(query)
    return document


# Example usage:

mongo_uri = "mongodb://devfpauser:Ok74c3YE7GLN@13.202.247.111:27017/devfpadb"
db_name = "devfpadb"
collection_name = "scenario_questions"

# Fetch by ObjectId
doc = fetch_document(mongo_uri, db_name, collection_name, doc_id="68a84b9cad06beaa751bc407")

# Or fetch by planning_scenario_id
# doc = fetch_document(mongo_uri, db_name, collection_name, planning_scenario_id="01KITA...")

print(doc)




{'_id': ObjectId('68a84b9cad06beaa751bc407'), 'planning_scenario_id': '01K1TAXXNV4GP7FAKYJSWCCEZP', 'questions': ['Why are utility costs increasing across retail stores compared to last year?', 'How can we identify which stores are driving the highest utility expenses?', 'What cost-control measures can we implement without affecting customer experience?', 'How should rising utility costs be factored into next year’s Opex forecast?', 'What are the cost differences between in-house logistics and outsourcing delivery?', 'How can we calculate the breakeven point for outsourcing logistics?', 'What risks do we face if we outsource logistics to third-party providers?', 'If outsourcing is cheaper but less reliable, how should we approach the decision?', 'How do we accurately forecast temporary staffing costs during peak seasons?', 'What historical data should be used to predict seasonal Opex?', 'How can staffing levels be aligned with sales forecasts to avoid over/understaffing?', 'What KPIs c

In [10]:
from pymongo import MongoClient
from datetime import datetime

def upload_document_to_mongo(mongo_uri, db_name, collection_name, document):
    """
    Uploads a document to MongoDB collection.

    Args:
        mongo_uri (str): MongoDB connection string.
        db_name (str): Database name.
        collection_name (str): Collection name.
        document (dict): Document to insert.

    Returns:
        Inserted document ID
    """
    try:
        client = MongoClient(mongo_uri)
        db = client[db_name]
        collection = db[collection_name]

        result = collection.insert_one(document)
        return result.inserted_id
    except Exception as e:
        print("Error inserting document:", e)
    finally:
        client.close()


# Example usage
if __name__ == "__main__":
    mongo_uri = "mongodb://devfpauser:Ok74c3YE7GLN@13.202.247.111:27017/devfpadb"
    db_name = "devfpadb"
    collection_name = "recommendeddimensions"

    document = {
        "filename": "transactions_generated",
        "planning_scenario_id": "01K1TAZPVGNR5KK6BBRHZFYWQ7",
        "project_id": "01K5C99CP1TYR39PXNB2PAX980",
        "columns": {
            "measure": "amount",
            "date_dimension": "date"
        },
        "created_at": datetime.utcnow(),
        "rows": [
            "companycode",
            "centerc",
            "glaccount",
            "profitcenter"
            
        ],
        "updated_at": datetime.utcnow()
    }

    inserted_id = upload_document_to_mongo(mongo_uri, db_name, collection_name, document)
    print("Inserted document ID:", inserted_id)



 

Inserted document ID: 68caecb854239d1504298d7f


In [1]:
from pymongo import MongoClient

def insert_scenario_questions(mongo_uri, db_name, collection_name, planning_scenario_id, questions):
    """
    Insert a single document with multiple questions for a planning_scenario_id.
    
    Args:
        mongo_uri (str): MongoDB connection URI
        db_name (str): Database name
        collection_name (str): Collection name
        planning_scenario_id (str): The scenario identifier
        questions (list of str): List of questions
    
    Returns:
        str: The ObjectId of the inserted document as a string
    """
    client = MongoClient(mongo_uri)
    db = client[db_name]
    collection = db[collection_name]

    document = {
        "planning_scenario_id": planning_scenario_id,
        "questions": questions
    }

    result = collection.insert_one(document)
    return str(result.inserted_id)
# Example usage:


mongo_uri = "mongodb://devfpauser:Ok74c3YE7GLN@13.202.247.111:27017/devfpadb"
db_name = "devfpadb"
collection_name = "scenario_questions"


questions_list = [
"What are the current and projected trends in commercial real estate prices (malls, high-street, warehouses) and retail store expansion costs?",
"How are construction material and store fit-out costs (cement, steel, HVAC, shelving, lighting) trending, and what impact do labor shortages have?",
"What is the outlook for technology CAPEX in retail (POS systems, self-checkout, warehouse automation, last-mile logistics, cybersecurity)?",
"How are logistics real estate and warehousing investments (cold storage, fulfillment centers) evolving, and what role is automation/robotics playing?",
"What ESG-driven CAPEX requirements (green buildings, solar rooftops, EV charging, energy-efficient infrastructure) are emerging for retailers?",
"How are interest rate changes and financing costs affecting CAPEX funding (WACC, retail real estate loans, long-term project viability)?",
"What are the trends in retail M&A, franchising, and partnerships, and how do they affect capital expenditure strategies (greenfield vs acquisition)?",
"How are regulatory and policy shifts (FDI caps, REIT rules, tax incentives, infrastructure fees) influencing retail expansion projects?",
"How are changing consumer behaviors (shift to Tier-2/Tier-3 cities, smaller experiential stores, pop-ups) impacting retail CAPEX allocation?",
"What global risks (geopolitical tensions, FX volatility, import delays for equipment/fixtures) are affecting retail CAPEX execution?"
]


inserted_id = insert_scenario_questions(
    mongo_uri,
    db_name,
    collection_name,
    planning_scenario_id="01K1TAZ82AQT719F7A3RBHM3BN",
    questions=questions_list
)

print(f"Inserted document with _id: {inserted_id}")


ServerSelectionTimeoutError: 13.202.247.111:27017: timed out (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms), Timeout: 30s, Topology Description: <TopologyDescription id: 68dcb4c6b47b5045d298a64e, topology_type: Unknown, servers: [<ServerDescription ('13.202.247.111', 27017) server_type: Unknown, rtt: None, error=NetworkTimeout('13.202.247.111:27017: timed out (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms)')>]>

In [None]:
import pandas as pd
from pymongo import MongoClient


def get_term_of_plan(collection, project_id, planning_scenario_id, file_name):
    result = collection.find_one(
        {
            "project_id": project_id,
            "planning_scenario_id": planning_scenario_id,
            "filename": file_name
        },
        {
            "term_of_plan": 1,  # project only the field you want
            "_id": 0
        }
    )
    return result.get("term_of_plan") if result else None

mongo_uri= "mongodb://devfpauser:Ok74c3YE7GLN@13.202.247.111:27017/devfpadb"
db_name= "devfpadb"
client = MongoClient(mongo_uri)
db = client[db_name]
transaction_collection =db["transactionaldata"]
term = get_term_of_plan(
    collection=transaction_collection,
    project_id="01K5DMJ4KE5WSK9BE3KM3M0931",
    planning_scenario_id="01K1TAZPVGNR5KK6BBRHZFYWQ7",
    file_name="trans"
)
print(term)  # 👉 "AOP"


AOP


In [1]:
import pandas as pd

df= pd.read_parquet("/home/sravanthi/Fincace_pa/Lambdas/forecasting/trans_variance.parquet")

In [2]:
df.head(2)

Unnamed: 0,gl_account_id,profit_center_id,company_code_id,cost_center_id,period,actual_amount,forecast_amount,variance,variance_pct,actual_year,budget_year
0,5000002,PCENT002,CCODE001,CCTR002,Sep-2024,107746.13,54370.25,53375.88,98%,2024,2025
1,5000002,PCENT002,CCODE001,CCTR003,Nov-2024,4782.09,2413.11,2368.98,98%,2024,2025
