In [None]:
!pip install dowhy networkx 


In [2]:
# --- Imports ---

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import dowhy
from dowhy import CausalModel
import networkx as nx
import matplotlib.pyplot as plt

In [None]:

df = pd.read_csv(r"C:\Users\Raj Dhanush\Downloads\archive (6)\retail_store_inventory.csv")
df.head()


In [None]:
df['Category'].unique()

In [None]:
df['Category'].value_counts()

In [None]:
# Compute the frequency of each Category within each Seasonality
category_season_freq = df.groupby("Seasonality")["Category"].value_counts().unstack().fillna(0)

# Display result
category_season_freq


In [None]:
# Calculate the final price after discount
df["Final Price"] = df["Price"] * (1 - df["Discount"] / 100)

# Display the first few rows with the new column
df.head()


In [None]:
# Remove the 'Price' and 'Discount' columns
df = df.drop(columns=["Price", "Discount",])

# Display the first few rows to confirm changes
df.head()

In [None]:
# Remove the 'Price' and 'Discount' columns
df = df.drop(columns=["Date","Store ID","Product ID"])

# Display the first few rows to confirm changes
df.head()

In [None]:
print(df.columns)

In [None]:
import dowhy
import statsmodels.api as sm
import pandas as pd
import numpy as np

# Make a copy of the original dataset to avoid changes to it
df_encoded = df.copy()

# Convert categorical variables into numerical form using one-hot encoding
df_encoded = pd.get_dummies(df_encoded, columns=["Category", "Region", "Weather Condition", "Seasonality"], drop_first=True)

# Define the causal graph (cause-and-effect relationships)
causal_graph = """
digraph {
    "Final Price" -> "Units Sold";
    "Seasonality" -> "Units Sold";
    "Inventory Level" -> "Units Sold";
    "Weather Condition" -> "Units Sold";
    "Holiday/Promotion" -> "Units Sold";
    "Competitor Pricing" -> "Units Sold";
}
"""

# Create a causal model using DoWhy
model = dowhy.CausalModel(
    data=df_encoded,  # Use the modified dataset
    treatment="Final Price",  # The variable we want to analyze
    outcome="Units Sold",  # The effect we are studying
    graph=causal_graph  # The causal diagram
)

# Identify the causal effect
identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
print("\nIdentified Estimand:")
print(identified_estimand)

# Find other factors (effect modifiers) that may influence the results
effect_modifiers = [
    col for col in df_encoded.columns
    if col not in ["Final Price", "Units Sold"] and not col.startswith("__categorical__")
]
print("\nEffect Modifiers Used:", effect_modifiers)

# Estimate the causal effect using a Generalized Linear Model (GLM)
estimate = model.estimate_effect(
    identified_estimand,
    method_name="backdoor.generalized_linear_model",
    method_params={"glm_family": sm.families.Gaussian()},
    effect_modifiers=effect_modifiers
)
print("\nCausal Effect Estimate:", estimate.value)

# Run refutation tests to check if our results are reliable
print("\nRunning Refutation Tests...")

# Test 1: Placebo Treatment (Randomly shuffle treatment values and check if results change)
refutation_placebo = model.refute_estimate(identified_estimand, estimate, method_name="placebo_treatment_refuter")
print("\nPlacebo Treatment Test Result:")
print(refutation_placebo)

# Test 2: Data Subset (Use only a portion of the dataset and check if results are similar)
refutation_subset = model.refute_estimate(identified_estimand, estimate, method_name="data_subset_refuter")
print("\nData Subset Test Result:")
print(refutation_subset)

# Generate example predictions for sales (Replace this with real calculations)
predicted_sales = np.random.uniform(0.001, 0.005)  # Fake prediction for now
recommended_price = np.random.uniform(50, 60)  # Fake optimal price

# Print final results in a clear way
print("\nFinal Results:")
print("Causal Effect of Price on Sales:", round(float(estimate.value), 5))
print("Predicted Sales (Bayesian Network):", round(float(predicted_sales), 5))
print("Recommended Optimal Price:", round(float(recommended_price), 2))


Performance Metrics (R-squared, MAE, RMSE)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Step 1: Prepare the true and predicted values for evaluation
# Assuming the real 'Units Sold' are in df["Units Sold"] and your predicted values are stored in 'predicted_sales_original'
true_values = df["Units Sold"].values
predicted_values = np.array([predicted_sales_original] * len(true_values))  # Adjust this if you have more detailed predictions

# Step 2: Calculate R-squared, MAE, and RMSE
r_squared = r2_score(true_values, predicted_values)
mae = mean_absolute_error(true_values, predicted_values)
rmse = np.sqrt(mean_squared_error(true_values, predicted_values))

# Step 3: Print the performance metrics
print("\nPerformance Metrics:")
print(f"R-squared: {r_squared:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")


Causal Effect Analysis Visualization

In [None]:
import matplotlib.pyplot as plt

# Step 1: Visualize Causal Effect
def plot_causal_effect(effect):
    plt.figure(figsize=(8, 6))
    plt.barh(['Causal Effect'], [effect], color='lightblue')
    plt.xlabel("Causal Effect Value")
    plt.title("Causal Effect of Price on Units Sold")
    plt.xlim([-1, 1])  # Adjust the x-axis limits based on the effect size
    plt.show()

# Step 2: Call the function with the causal effect value
plot_causal_effect(estimate.value)


In [None]:
import pandas as pd
import numpy as np
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import HillClimbSearch, BicScore, MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

# Step 1: Load the dataset
file_path = r"C:\Users\Ditheswar\Downloads\retail_store_inventory.csv"  # Update this path
print("Loading dataset...")
df = pd.read_csv(file_path)
print("Dataset loaded successfully!")

# Step 2: Convert categorical columns to numerical values
categorical_columns = ["Category", "Region", "Weather Condition", "Seasonality", "Holiday/Promotion"]
print("Encoding categorical variables...")
encoders = {}
for col in categorical_columns:
    encoder = LabelEncoder()
    df[col] = encoder.fit_transform(df[col])  # Convert categories to numbers
    encoders[col] = encoder
print("Categorical variables encoded successfully!")

# Step 3: Scale 'Units Sold' using Min-Max Scaling
print("Scaling 'Units Sold' column...")
scaler = MinMaxScaler()
df["Units Sold"] = scaler.fit_transform(df[["Units Sold"]])
print("Scaling complete!")

# Step 4: Learn the Bayesian Network structure
print("Learning Bayesian Network structure...")
hc = HillClimbSearch(df)
best_model = hc.estimate(scoring_method=BicScore(df))
print("Structure learning complete!")

# Step 5: Display learned structure
print("\nLearned Bayesian Network Structure:")
if best_model.edges():
    for edge in best_model.edges():
        print(f"{edge[0]} → {edge[1]}")
else:
    print("No edges found! The model may need more data or different parameters.")

# Step 6: Ensure 'Competitor Pricing' is part of the network
if not any(edge[0] == "Competitor Pricing" or edge[1] == "Competitor Pricing" for edge in best_model.edges()):
    print("\nAdding 'Competitor Pricing' → 'Units Sold' manually...")
    best_model.add_edge("Competitor Pricing", "Units Sold")
    print("Edge added successfully!")

# Step 7: Train the Bayesian Network
print("Training Bayesian Network...")
model = BayesianNetwork(best_model.edges())
model.fit(df, estimator=MaximumLikelihoodEstimator)
print("Model training complete!")

# Step 8: Perform inference using the trained model
print("Setting up inference engine...")
inference = VariableElimination(model)
print("Inference engine ready!")

# Step 9: Check if 'Units Sold' is in the model before querying
if "Units Sold" not in model.nodes():
    raise ValueError("Error: 'Units Sold' is not found in the Bayesian Network!")

# Step 10: Predict 'Units Sold' based on 'Competitor Pricing'
print("Predicting 'Units Sold' based on 'Competitor Pricing'...")
competitor_pricing_value = df["Competitor Pricing"].median()
query_result = inference.query(variables=["Units Sold"], evidence={"Competitor Pricing": competitor_pricing_value})

# Step 11: Convert the prediction back to the original scale
predicted_sales_scaled = query_result.values[0]
predicted_sales_original = scaler.inverse_transform([[predicted_sales_scaled]])[0][0]

# Step 12: Display final results
print("\nFinal Prediction Results:")
print(f"Competitor Pricing Used for Prediction: {competitor_pricing_value:.2f}")
print(f"Predicted Sales (Original Scale): {predicted_sales_original:.2f}")
print("Prediction complete!")

Bayesian Network Prediction Accuracy

In [None]:
import matplotlib.pyplot as plt

# Step 1: Compare predicted vs actual sales
def plot_bayesian_network_accuracy(predicted_sales, true_sales):
    plt.figure(figsize=(8, 6))
    plt.plot(true_sales, label="True Sales", color='blue', linestyle='--')
    plt.plot(predicted_sales, label="Predicted Sales", color='red', linestyle=':')
    plt.xlabel("Index")
    plt.ylabel("Units Sold")
    plt.title("Bayesian Network Prediction vs Actual Sales")
    plt.legend()
    plt.show()

# Step 2: Visualize the results
predicted_sales_values = np.array([predicted_sales_original] * len(true_values))  # Adjust if necessary
plot_bayesian_network_accuracy(predicted_sales_values, true_values)


In [None]:
import numpy as np

# Step 1: Retrieve Results from Previous Codes
# Extract causal effect from DoWhy output
causal_effect = estimate.value

# Extract predicted sales from Bayesian Network output
predicted_sales = query_result.values

# Step 2: Compute the Mean Prediction for Stability
predicted_sales_mean = np.mean(predicted_sales)  # Average predicted sales for better accuracy

# Step 3: Check if 'Final Price' Column Exists
price_column = [col for col in df.columns if "price" in col.lower()]

if not price_column:
    raise KeyError("Error: No column related to 'price' found in the DataFrame. Please check column names.")

price_column = price_column[0]  # Select the first matching column

# Step 4: Determine the Best Price Based on Causal Effect
average_price = df[price_column].mean()  # Calculate the mean price

if causal_effect < 0:
    optimal_price = average_price * (1 - abs(causal_effect))  # Lower price if causal effect is negative
else:
    optimal_price = average_price * (1 + causal_effect)  # Increase price if causal effect is positive

# Step 5: Format and Display Final Output
final_output = {
    "Causal Effect of Price on Sales": round(causal_effect, 4),
    "Predicted Sales (Bayesian Network)": round(predicted_sales_mean, 4),
    "Recommended Optimal Price": round(optimal_price, 2)
}

print("\nFinalized Optimal Pricing Decision:")
for key, value in final_output.items():
    print(f"{key}: {value}")


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Step 1: Define Sample Price Data for Demonstration
prices = np.linspace(df[price_column].min(), df[price_column].max(), 100)  # Generate price points

# Step 2: Simulate Causal Effect on Sales (For Visualization)
causal_sales = df[price_column].mean() * (1 + causal_effect * np.sin(prices))  # Simplified for demo

# Step 3: Simulate Predicted Sales from Bayesian Network (For Visualization)
predicted_sales_from_bn = np.random.normal(predicted_sales_mean, 10, len(prices))  # Simulated predicted sales

# Step 4: Plot Both Models
plt.figure(figsize=(10, 6))

# Causal Effect Line
plt.plot(prices, causal_sales, label="Causal Effect on Sales", color='b', linewidth=2)

# Predicted Sales Line (Bayesian Network)
plt.plot(prices, predicted_sales_from_bn, label="Predicted Sales (Bayesian Network)", color='g', linestyle='--', linewidth=2)

# Step 5: Highlight Optimal Price (Recommendation)
plt.axvline(optimal_price, color='r', linestyle=':', label=f"Optimal Price: ${optimal_price:.2f}")

# Step 6: Graph Labels and Title
plt.title("Causal Effect vs. Predicted Sales for Optimal Pricing Decision", fontsize=14)
plt.xlabel("Price", fontsize=12)
plt.ylabel("Sales", fontsize=12)
plt.legend(loc="upper left")
plt.grid(True)

# Display the Graph
plt.show()


In [None]:
import numpy as np

# Step 1: Get user input for test values
print("\nEnter test values for pricing analysis:")
test_price = float(input("Enter the test price: "))
test_inventory = float(input("Enter inventory level: "))
test_weather = int(input("Enter weather condition (0: Normal, 1: Rainy, 2: Snowy): "))
test_season = int(input("Enter seasonality (0: Spring, 1: Summer, 2: Winter): "))
test_promotion = int(input("Enter holiday/promotion status (0: No, 1: Yes): "))
test_competitor_price = float(input("Enter competitor pricing: "))

# Step 2: Retrieve Results from Previous Models
causal_effect = estimate.value  # Extracted from DoWhy model
predicted_sales = query_result.values  # Extracted from Bayesian Network model

# Step 3: Compute the Mean Prediction for Stability
predicted_sales_mean = np.mean(predicted_sales)  # Average predicted units sold

# Step 4: Adjust Price Based on Causal Impact & Bayesian Prediction
adjusted_price = test_price * (1 + causal_effect)  # Adjust price using causal impact

# Step 5: Store results in a dictionary for structured output
results = {
    "Test Price": test_price,
    "Inventory Level": test_inventory,
    "Weather Condition": test_weather,
    "Seasonality": test_season,
    "Holiday/Promotion": test_promotion,
    "Competitor Pricing": test_competitor_price,
    "Predicted Sales (Bayesian Network)": round(predicted_sales_mean, 2),
    "Causal Effect of Price on Sales": round(causal_effect, 4),
    "Recommended Adjusted Price": round(adjusted_price, 2)
}

# Step 6: Print the formatted output
print("\n===== Model Analysis Results =====")
for key, value in results.items():
    print(f"{key}: {value}")
