In [None]:
import pandas as pd
from datetime import datetime
import os

In [None]:
# Paths to your snapshots
snapshots = ["/content/MCA-Insight-Engine/Data/Simulated Snapshots/day1_master.csv",
             "/content/MCA-Insight-Engine/Data/Simulated Snapshots/day2_master.csv",
             "/content/MCA-Insight-Engine/Data/Simulated Snapshots/day3_master.csv"]
# Columns to track for changes
tracked_fields = ["COMPANY_STATUS", "AUTHORIZED_CAP", "PAIDUP_CAPITAL", "COMPANY_NAME"]
output_folder = '/content/MCA-Insight-Engine/Data/Change logs'

In [None]:
def detect_changes(prev_df, curr_df, date_str):
    changes = []

    prev_cin_set = set(prev_df["CORPORATE_IDENTIFICATION_NUMBER"])
    curr_cin_set = set(curr_df["CORPORATE_IDENTIFICATION_NUMBER"])

    # 1️⃣ New incorporations
    new_cins = curr_cin_set - prev_cin_set
    for cin in new_cins:
        row = curr_df[curr_df["CORPORATE_IDENTIFICATION_NUMBER"] == cin].iloc[0]
        changes.append({
            "CIN": cin,
            "Change_Type": "New Incorporation",
            "Field_Changed": "ALL",
            "Old_Value": None,
            "New_Value": row.to_dict(),
            "Date": date_str
        })

    # 2️⃣ Deregistered companies
    removed_cins = prev_cin_set - curr_cin_set
    for cin in removed_cins:
        row = prev_df[prev_df["CORPORATE_IDENTIFICATION_NUMBER"] == cin].iloc[0]
        changes.append({
            "CIN": cin,
            "Change_Type": "Deregistered",
            "Field_Changed": "ALL",
            "Old_Value": row.to_dict(),
            "New_Value": None,
            "Date": date_str
        })

    # 3️⃣ Field updates
    common_cins = prev_cin_set & curr_cin_set
    for cin in common_cins:
        prev_row = prev_df[prev_df["CORPORATE_IDENTIFICATION_NUMBER"] == cin].iloc[0]
        curr_row = curr_df[curr_df["CORPORATE_IDENTIFICATION_NUMBER"] == cin].iloc[0]

        for field in tracked_fields:
            if prev_row[field] != curr_row[field]:
                changes.append({
                    "CIN": cin,
                    "Change_Type": "Field Update",
                    "Field_Changed": field,
                    "Old_Value": prev_row[field],
                    "New_Value": curr_row[field],
                    "Date": date_str
                })

    return changes

# ----------------------------
# Loop through snapshots to generate change logs
# ----------------------------
for i in range(1, len(snapshots)):
    prev_df = pd.read_csv(snapshots[i-1])
    curr_df = pd.read_csv(snapshots[i])
    date_str = f"Day{i+1}"  # Day2, Day3, etc.

    changes = detect_changes(prev_df, curr_df, date_str)

    # Save changes even if empty
    changes_df = pd.DataFrame(changes)
    output_file = os.path.join(output_folder, f"{date_str}_changes.csv")
    changes_df.to_csv(output_file, index=False)

    print(f"✅ Change log created: {output_file} ({len(changes)} changes detected)")

✅ Change log created: /content/MCA-Insight-Engine/Data/Change logs/Day2_changes.csv (4 changes detected)
✅ Change log created: /content/MCA-Insight-Engine/Data/Change logs/Day3_changes.csv (3 changes detected)


In [None]:
from google.colab import auth
auth.authenticate_user()


In [None]:
!git config --global user.email "dixitabhayraj2603@gmail.com"
!git config --global user.name "ARD-droid"
!git clone https://github.com/ARD-droid/MCA-Insight-Engine
!git remote set-url origin https://ARD-droid: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@github.com/ARD-droid/MCA-Insight-Engine.git
!pwd
%cd /content/MCA-Insight-Engine
!git lfs install
!git lfs track "*.csv"
!git add .gitattributes
!git add '/content/MCA-Insight-Engine/Data/Change logs'
!git commit -m "Change Detection Outputs"
!git pull origin main --rebase
!git push origin main

Cloning into 'MCA-Insight-Engine'...
remote: Enumerating objects: 78, done.[K
remote: Counting objects: 100% (78/78), done.[K
remote: Compressing objects: 100% (61/61), done.[K
remote: Total 78 (delta 8), reused 61 (delta 4), pack-reused 0 (from 0)[K
Receiving objects: 100% (78/78), 10.33 KiB | 10.33 MiB/s, done.
Resolving deltas: 100% (8/8), done.
Filtering content: 100% (14/14), 373.27 MiB | 43.50 MiB/s, done.
/content/MCA-Insight-Engine
/content/MCA-Insight-Engine
Updated git hooks.
Git LFS initialized.
"*.csv" already supported
[main 1226bdd] Change Detection Outputs
 2 files changed, 6 insertions(+)
 create mode 100644 Data/Change logs/Day2_changes.csv
 create mode 100644 Data/Change logs/Day3_changes.csv
From https://github.com/ARD-droid/MCA-Insight-Engine
 * branch            main       -> FETCH_HEAD
Current branch main is up to date.
Uploading LFS objects: 100% (2/2), 2.6 KB | 0 B/s, done.
Enumerating objects: 9, done.
Counting objects: 100% (9/9), done.
Delta compression u