In [1]:
import os
import sys
from pathlib import Path
from kedro.framework.project import configure_project
from kedro.framework.session import KedroSession

# Ensure you're in project root
project_path = Path("/Users/ajaynehra/Desktop/projects/econometrics-modelling")
os.chdir(project_path)

# Add src/ to sys.path
sys.path.append(str(project_path / "src"))

# Configure Kedro project
configure_project("econometrics_modelling")

# Create session and context
session = KedroSession.create(project_path=project_path)
context = session.load_context()

# Load catalog and params
catalog = context.catalog
params = context.params

print("✅ Kedro context loaded.")
print("Loaded param keys:", list(params.keys()))


25/06/21 20:19:07 WARN Utils: Your hostname, Ajays-MacBook-Pro.local resolves to a loopback address: 127.0.0.1; using 192.168.1.4 instead (on interface en0)
25/06/21 20:19:07 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/06/21 20:19:07 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


✅ Kedro context loaded.
Loaded param keys: ['feature_engineering', 'preprocessing', 'mixed_modeling']


In [2]:
from econometrics_modelling.pipelines.mixed_modelling.nodes import prepare_formula_for_MM

mm_spec = params["mixed_modeling"]["model_specification"]
formula = prepare_formula_for_MM({"model_specification": mm_spec})

print("\n✅ Generated formula:\n", formula)





✅ Generated formula:
 log_total_volume ~ log_avg_price + log_promo_acv_tpr + trend + log_avg_price:ppg_id + log_promo_acv_tpr:retailer_id + (1|ppg_id) + (1|retailer_id) + (0+log_avg_price|ppg_id) + (0+log_promo_acv_tpr|retailer_id) + (1+trend|ppg_id)




In [3]:
import pandas as pd

data_path = Path("data/08_model_input/feature_data.csv")
print("✅ Data file exists:", data_path.exists())

df = pd.read_csv(data_path)
print("✅ Data shape:", df.shape)
print("✅ Sample columns:\n", df.columns.tolist())


✅ Data file exists: True
✅ Data shape: (624, 83)
✅ Sample columns:
 ['ppg_id', 'retailer_id', 'total_volume', 'promo_volume', 'total_sales', 'promo_sales', 'promo_acv_tpr', 'promo_acv_feature', 'promo_acv_display', 'promo_acv_feature_display', 'acv_weighted_distribution', 'brand', 'sub_brand', 'size', 'pack_count', 'avg_price', 'edlp_price', 'holiday_flag', 'cpi', 'xpi', 'opi', 'log_total_volume', 'log_avg_price', 'log_promo_acv_tpr', 'log_promo_acv_feature', 'log_promo_acv_display', 'log_promo_acv_feature_display', 'log_cpi', 'log_xpi', 'log_opi', 'trend', 'week_1', 'week_2', 'week_3', 'week_4', 'week_5', 'week_6', 'week_7', 'week_8', 'week_9', 'week_10', 'week_11', 'week_12', 'week_13', 'week_14', 'week_15', 'week_16', 'week_17', 'week_18', 'week_19', 'week_20', 'week_21', 'week_22', 'week_23', 'week_24', 'week_25', 'week_26', 'week_27', 'week_28', 'week_29', 'week_30', 'week_31', 'week_32', 'week_33', 'week_34', 'week_35', 'week_36', 'week_37', 'week_38', 'week_39', 'week_40', 'week

In [4]:
import re

# Extract column names from formula
formula_cols = set(re.findall(r"[a-zA-Z_][a-zA-Z0-9_]*", formula))
missing_cols = [col for col in formula_cols if col not in df.columns]

print("✅ Columns used in formula:", formula_cols)
print("❌ Missing columns in DataFrame:", missing_cols)


✅ Columns used in formula: {'log_promo_acv_tpr', 'log_total_volume', 'trend', 'log_avg_price', 'retailer_id', 'ppg_id'}
❌ Missing columns in DataFrame: []


In [5]:
# Step 3: Extract grouping variables
def extract_grouping_vars(params_dict):
    #spec = params_dict["model_specification"]
    spec=params_dict['mixed_modeling']['model_specification']
    group_vars = set()

    group_vars.update(spec.get("random_effects", {}).get("uncorrelated", {}).get("intercepts", []))

    for slope in spec.get("random_effects", {}).get("uncorrelated", {}).get("slopes", []):
        group_vars.add(slope["by_level"])

    for corr in spec.get("random_effects", {}).get("correlated", []):
        group_vars.add(corr["by_level"])

    for fx in spec.get("fixed_effects", {}).get("interactions", []):
        group_vars.add(fx["with_level"])

    return list(group_vars)


In [6]:

grouping_vars = extract_grouping_vars(params)
print("🔁 Grouping variables:", grouping_vars)

🔁 Grouping variables: ['ppg_id', 'retailer_id']


In [7]:
from julia import Main
Main.include("src/econometrics_modelling/pipelines/mixed_modelling/mixed_model.jl")

print("✅ Calling Julia model function...")
results = Main.mixed_model_fn(str(data_path), formula,grouping_vars)
print("✅ Julia model function returned results")


✅ Calling Julia model function...
🔍 Reading data from: data/08_model_input/feature_data.csv
📊 Converting grouping variables to categorical: ["ppg_id", "retailer_id"]
🧮 Parsing formula string as raw formula expression
