<a href="https://colab.research.google.com/github/KARTHICK20-jan/ENEC-Karthick/blob/main/Datanetra.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from google.colab import files

np.random.seed(42)

rows = 300

df = pd.DataFrame({
    "MSME_ID": [f"MSME_{i+1:04d}" for i in range(rows)],
    "Business_Type": np.random.choice(
        ["Grocery", "Pharmacy", "Clothing", "Electronics", "Restaurant"], rows
    ),
    "City": np.random.choice(
        ["Chennai", "Coimbatore", "Trichy", "Madurai", "Salem", "Bangalore"], rows
    ),
    "Years_in_Operation": np.random.randint(1, 25, rows),
    "Monthly_Sales_INR": np.random.randint(150000, 5000000, rows),
    "Monthly_Demand_Units": np.random.randint(300, 20000, rows),
    "Inventory_Turnover": np.round(np.random.uniform(1.5, 10.0, rows), 2),
    "Avg_Margin_Percent": np.round(np.random.uniform(5, 35, rows), 2),
    "Monthly_Operating_Cost_INR": np.random.randint(80000, 3000000, rows),
    "Outstanding_Loan_INR": np.random.randint(0, 8000000, rows),
    "On_Time_Payment_Rate": np.round(np.random.uniform(0.6, 1.0, rows), 2),
    "Vendor_Delivery_Reliability": np.round(np.random.uniform(0.5, 1.0, rows), 2),
    "Supplier_Count": np.random.randint(1, 15, rows),
    "Employee_Count": np.random.randint(2, 120, rows),
    "Digital_Ad_Spend_INR": np.random.randint(0, 300000, rows),
})

# Financial Risk Score (0 = Low Risk, 1 = High Risk)
df["Financial_Risk_Score"] = (
    0.4 * (df["Outstanding_Loan_INR"] / (df["Monthly_Sales_INR"] * 12)) +
    0.3 * (1 - df["On_Time_Payment_Rate"]) +
    0.3 * (df["Monthly_Operating_Cost_INR"] / df["Monthly_Sales_INR"])
).clip(0, 1)

# Vendor Score (0–1)
df["Vendor_Score"] = (
    0.6 * df["Vendor_Delivery_Reliability"] +
    0.4 * (df["Inventory_Turnover"] / df["Inventory_Turnover"].max())
)

# Growth Potential Score (0–1)
df["Growth_Potential_Score"] = (
    0.5 * (df["Avg_Margin_Percent"] / 35) +
    0.3 * (df["Monthly_Demand_Units"] / df["Monthly_Demand_Units"].max()) +
    0.2 * (df["Digital_Ad_Spend_INR"] / df["Digital_Ad_Spend_INR"].max())
)

# Save and download
file_name = "MSME_Retail_Dataset.xlsx"
df.to_excel(file_name, index=False)
files.download(file_name)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [2]:
# ==============================
# DataNetra MVP – AI Dashboard PoC (Colab)
# Upload Excel → Predict → Visualize → PDF Report
# ==============================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from google.colab import files

# Upload dataset
uploaded = files.upload()
file_name = list(uploaded.keys())[0]

df = pd.read_excel(file_name)

# ---------- SIMPLE FORECAST LOGIC ----------
years = [2026, 2027, 2028]

df["Sales_2026"] = df["Monthly_Sales_INR"] * 12 * 1.08
df["Sales_2027"] = df["Sales_2026"] * 1.10
df["Sales_2028"] = df["Sales_2027"] * 1.12

df["Demand_2026"] = df["Monthly_Demand_Units"] * 1.07
df["Demand_2027"] = df["Demand_2026"] * 1.09
df["Demand_2028"] = df["Demand_2027"] * 1.11

# ---------- AGGREGATED METRICS ----------
sales_forecast = [
    df["Sales_2026"].sum(),
    df["Sales_2027"].sum(),
    df["Sales_2028"].sum()
]

demand_forecast = [
    df["Demand_2026"].sum(),
    df["Demand_2027"].sum(),
    df["Demand_2028"].sum()
]

avg_risk = df["Financial_Risk_Score"].mean()
avg_vendor = df["Vendor_Score"].mean()

# ---------- PDF DASHBOARD ----------
pdf_file = "DataNetra_MSME_AI_Report.pdf"

with PdfPages(pdf_file) as pdf:

    # Sales Forecast
    plt.figure()
    plt.plot(years, sales_forecast, marker="o")
    plt.title("3-Year Sales Forecast (INR)")
    plt.xlabel("Year")
    plt.ylabel("Total Sales")
    pdf.savefig()
    plt.close()

    # Demand Forecast
    plt.figure()
    plt.plot(years, demand_forecast, marker="o")
    plt.title("3-Year Demand Forecast (Units)")
    plt.xlabel("Year")
    plt.ylabel("Total Demand")
    pdf.savefig()
    plt.close()

    # Financial Risk Distribution
    plt.figure()
    plt.hist(df["Financial_Risk_Score"], bins=10)
    plt.title("Financial Risk Score Distribution")
    plt.xlabel("Risk Score")
    plt.ylabel("MSME Count")
    pdf.savefig()
    plt.close()

    # Vendor Score Distribution
    plt.figure()
    plt.hist(df["Vendor_Score"], bins=10)
    plt.title("Vendor / Supplier Score Distribution")
    plt.xlabel("Vendor Score")
    plt.ylabel("Count")
    pdf.savefig()
    plt.close()

    # Executive Summary Page
    plt.figure(figsize=(8,6))
    plt.axis("off")
    summary = f"""
    DataNetra – MSME AI Decision Support Summary

    Total MSMEs Analysed : {len(df)}
    Average Financial Risk Score : {avg_risk:.2f}
    Average Vendor Score : {avg_vendor:.2f}

    Insights:
    • Sales and demand show steady growth over 3 years
    • MSMEs with high risk scores require monitoring
    • Vendor scoring enables intelligent ecosystem mapping
    """
    plt.text(0.01, 0.5, summary, fontsize=12)
    pdf.savefig()
    plt.close()

# Download report
files.download(pdf_file)

print("✅ MVP AI Dashboard Report Generated Successfully")


Saving MSME_Retail_Dataset.xlsx to MSME_Retail_Dataset (1).xlsx


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ MVP AI Dashboard Report Generated Successfully
