## Clone github repo

In [1]:
!git clone https://github.com/23f2003589/iitm-mlops-w5.git

Cloning into 'iitm-mlops-w5'...
remote: Enumerating objects: 86, done.[K
remote: Counting objects: 100% (86/86), done.[K
remote: Compressing objects: 100% (57/57), done.[K
remote: Total 86 (delta 27), reused 6 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (86/86), 24.43 KiB | 6.11 MiB/s, done.
Resolving deltas: 100% (27/27), done.


In [7]:
import os
print(os.getcwd())

/home/jupyter/week8


In [14]:
import numpy as np
import joblib
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import json
import argparse

def load_original_model():
    model = joblib.load('iitm-mlops-w5/model.joblib')
    
def poison_data(X, y, poison_percent=0.05):
    n_samples = len(X)
    n_poison = int(n_samples * poison_percent)    
    X_poisoned = X.copy()
    y_poisoned = y.copy()
    poison_indices = np.random.choice(n_samples, n_poison, replace=False)
    for idx in poison_indices:
        original_label = y_poisoned[idx]
        available_labels = [l for l in np.unique(y) if l != original_label]
        y_poisoned[idx] = np.random.choice(available_labels)
    return X_poisoned, y_poisoned

def train_and_evaluate(poison_percent=0.0, save_metrics=True):
    iris = load_iris()
    X, y = iris.data[:160], iris.target[:160]
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    if poison_percent > 0:
        X_train_poisoned, y_train_poisoned = poison_data(X_train, y_train, poison_percent)
    else:
        X_train_poisoned, y_train_poisoned = X_train, y_train
    
    model = DecisionTreeClassifier(random_state=42)
    model.fit(X_train_poisoned, y_train_poisoned)
    
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(classification_report(y_test, y_pred, target_names=iris.target_names))
    
    if save_metrics:
        metrics = {
            "poison_percent": poison_percent,
            "accuracy": float(accuracy),
            "train_size": len(X_train),
            "test_size": len(X_test),
            "n_poisoned_samples": int(len(X_train) * poison_percent)
        }
        
        with open('metrics.json', 'w') as f:
            json.dump(metrics, f, indent=2)
    
    cm = confusion_matrix(y_test, y_pred)
    np.save('confusion_matrix.npy', cm)
    
    joblib.dump(model, 'model_poisoned.joblib')    
    return accuracy, model

def compare_models(poison_percent=0.05):
    original_model = load_original_model()
    iris = load_iris()
    X, y = iris.data[:160], iris.target[:160]
    _, X_test, _, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    if original_model:
        y_pred_original = original_model.predict(X_test)
        original_accuracy = accuracy_score(y_test, y_pred_original)
        print(f"Original Model Accuracy: {original_accuracy:.4f}")
    poisoned_accuracy, _ = train_and_evaluate(poison_percent, save_metrics=True)
    if original_model:
        accuracy_drop = original_accuracy - poisoned_accuracy
        print(f"Accuracy Drop: {accuracy_drop:.4f} ({accuracy_drop*100:.2f}%)")

compare_models(poison_percent=0.15)

              precision    recall  f1-score   support

      setosa       0.88      0.70      0.78        10
  versicolor       0.67      0.67      0.67         9
   virginica       0.69      0.82      0.75        11

    accuracy                           0.73        30
   macro avg       0.74      0.73      0.73        30
weighted avg       0.75      0.73      0.73        30



In [17]:
!git add iitm-mlops-w5/poison_demo.py

In [18]:
!git checkout -b week8

Switched to a new branch 'week8'


In [19]:
!git add .

[33mhint: You've added another git repository inside your current repository.[m
[33mhint: Clones of the outer repository will not contain the contents of[m
[33mhint: the embedded repository and will not know how to obtain it.[m
[33mhint: If you meant to add a submodule, use:[m
[33mhint: [m
[33mhint: 	git submodule add <url> week8/iitm-mlops-w5[m
[33mhint: [m
[33mhint: If you added this path by mistake, you can remove it from the[m
[33mhint: index with:[m
[33mhint: [m
[33mhint: 	git rm --cached week8/iitm-mlops-w5[m
[33mhint: [m
[33mhint: See "git help submodule" for more information.[m


In [20]:
!git commit -m "Poison Demo 5%"

[week8 e3a143b] Poison Demo 5%
 4 files changed, 8 insertions(+)
 create mode 100644 week8/confusion_matrix.npy
 create mode 160000 week8/iitm-mlops-w5
 create mode 100644 week8/metrics.json
 create mode 100644 week8/model_poisoned.joblib


In [21]:
!git remote -v

origin	https://23F2003589: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@github.com/23F2003589/23F2003589_IITMBS_MLOPS_OPPE1.git (fetch)
origin	https://23F2003589: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@github.com/23F2003589/23F2003589_IITMBS_MLOPS_OPPE1.git (push)


In [22]:
!git remote set-url origin https://github.com/23f2003589/iitm-mlops-w5.git


In [23]:
!git config user.name
!git config user.email


23f2003589@ds.study.iitm.ac.in
23f2003589


In [24]:
!git remote set-url origin https://23f2003589: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@github.com/23f2003589/iitm-mlops-w5.git

In [25]:
!git push origin week8

Enumerating objects: 2401, done.
Counting objects: 100% (2401/2401), done.
Delta compression using up to 2 threads
Compressing objects: 100% (1590/1590), done.
Writing objects: 100% (2401/2401), 360.67 MiB | 9.65 MiB/s, done.
Total 2401 (delta 573), reused 0 (delta 0), pack-reused 0
remote: Resolving deltas: 100% (573/573), done.[K
remote: 
remote: Create a pull request for 'week8' on GitHub by visiting:[K
remote:      https://github.com/23f2003589/iitm-mlops-w5/pull/new/week8[K
remote: 
To https://github.com/23f2003589/iitm-mlops-w5.git
 * [new branch]      week8 -> week8


In [58]:
%%bash
mkdir -p .github/workflows && cat > .github/workflows/data_poisoning.yml <<'EOF'
git config --global user.name "23f2003589"
git config --global user.email "23f2003589@ds.study.iitm.ac.in"
name: Data Poisoning CML Report

on:
  push:
    branches: [week-8]
  pull_request:
    branches: [main]

jobs:
  poison-experiment:
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v3
      
      - uses: actions/setup-python@v4
        with:
          python-version: '3.9'

      - run: pip install scikit-learn joblib numpy cml

      - name: Run poisoning demo
        run: python poison_demo.py

      - name: Generate and post CML report
        env:
          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          python - <<'PYCODE'
          import json
          d = json.load(open('metrics.json'))
          with open('report.md', 'w') as f:
              f.write(f"### Data Poisoning Report\n\n")
              f.write(f"- Poison Percent: {d['poison_percent']*100:.1f}%\n")
              f.write(f"- Accuracy: {d['accuracy']:.4f}\n")
              f.write(f"- Poisoned Samples: {d['n_poisoned_samples']}\n")
          PYCODE
          cml comment create report.md
EOF


In [41]:
!cat .github/workflows/data_poisoning.yml

name: Data Poisoning CML Report

on:
  push:
    branches: [week-8]
  pull_request:
    branches: [main]

jobs:
  poison-experiment:
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v3
      
      - uses: actions/setup-python@v4
        with:
          python-version: '3.9'

      - run: pip install scikit-learn joblib numpy cml

      - name: Run poisoning demo
        run: python poison_demo.py

      - name: Generate and post CML report
        env:
          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          python - <<'PYCODE'
          import json
          d = json.load(open('metrics.json'))
          with open('report.md', 'w') as f:
              f.write(f"### Data Poisoning Report\n\n")
              f.write(f"- Poison Percent: {d['poison_percent']*100:.1f}%\n")
              f.write(f"- Accuracy: {d['accuracy']:.4f}\n")
              f.write(f"- Poisoned Samples: {d['n_poisoned_samples']}\n")
          PYCODE
          cml comment create r

In [31]:
!git add .github
!git commit -m "Poison Demo 5% with cml report"

[week8 c9244ef] Poison Demo 5% with cml report
 1 file changed, 38 insertions(+)
 create mode 100644 week8/.github/workflows/data_poisoning.yml


In [32]:
!git push origin week8

Enumerating objects: 8, done.
Counting objects: 100% (8/8), done.
Delta compression using up to 2 threads
Compressing objects: 100% (3/3), done.
Writing objects: 100% (6/6), 926 bytes | 463.00 KiB/s, done.
Total 6 (delta 1), reused 0 (delta 0), pack-reused 0
remote: Resolving deltas: 100% (1/1), completed with 1 local object.[K
To https://github.com/23f2003589/iitm-mlops-w5.git
   e3a143b..c9244ef  week8 -> week8


In [43]:
%%bash
cd iitm-mlops-w5
ls -a
git rm -rf --cached .
git branch week8
git add poison_demo.py .github/workflows/data_poisoning.yml
git commit -m "Add poison demo and workflow"
git push origin week8 --force

.
..
.git
.github
.ipynb_checkpoints
.requirements.5txt.swp
Dockerfile
README.md
Untitled.ipynb
iris_fastapi.py
k8s
model.joblib
poison_demo.py
requirements.txt


fatal: pathspec '.' did not match any files
fatal: pathspec '.github/workflows/data_poisoning.yml' did not match any files


On branch main
Your branch is ahead of 'origin/main' by 1 commit.
  (use "git push" to publish your local commits)

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	.github/
	.ipynb_checkpoints/
	.requirements.5txt.swp
	Dockerfile
	README.md
	Untitled.ipynb
	iris_fastapi.py
	k8s/
	model.joblib
	poison_demo.py
	requirements.txt

nothing added to commit but untracked files present (use "git add" to track)


To https://github.com/23f2003589/iitm-mlops-w5.git
 + c9244ef...142e7b5 week8 -> week8 (forced update)


In [82]:
%%bash
cd iitm-mlops-w5

# Create the workflow file
mkdir -p .github/workflows
git config --global user.name "23f2003589"
git config --global user.email "23f2003589@ds.study.iitm.ac.in"
git remote set-url origin https://23f2003589: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@github.com/23f2003589/iitm-mlops-w5.git

# Commit and push to week8 branch
git switch -c week8 || git switch week8
git add .
git commit -m "15 percent poison demo"
git push origin week8


fatal: A branch named 'week8' already exists.
Already on 'week8'


On branch week8
nothing to commit, working tree clean


To https://github.com/23f2003589/iitm-mlops-w5.git
   d7dc967..21174f5  week8 -> week8


In [63]:
!git remote -v
!git config --global --unset credential.helper
!git config --global credential.helper store


origin	https://23f2003589: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@github.com/23f2003589/iitm-mlops-w5.git (fetch)
origin	https://23f2003589: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@github.com/23f2003589/iitm-mlops-w5.git (push)


In [56]:
!git remote set-url origin https://23f2003589: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@github.com/23f2003589/iitm-mlops-w5.git

In [54]:
%%bash
cd iitm-mlops-w5

# Create workflow folder and YAML file
mkdir -p .github/workflows

cat > .github/workflows/data_poisoning.yml <<'EOF'
name: Data Poisoning CML Report

on:
  push:
    branches: [week8]
  pull_request:
    branches: [main]

jobs:
  poison-experiment:
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v3
      - uses: actions/setup-python@v4
        with:
          python-version: '3.9'

      - name: Install dependencies
        run: |
          pip install --upgrade pip setuptools wheel
          pip install -U numpy==1.23.5 joblib cml
          pip install scikit-learn==1.2.2 --only-binary=:all:

      - name: Run poisoning demo
        run: python poison_demo.py

      - name: Generate and post CML report
        env:
          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          python - <<'PYCODE'
          import json
          d = json.load(open('metrics.json'))
          with open('report.md', 'w') as f:
              f.write(f"### Data Poisoning Report\n\n")
              f.write(f"- Poison Percent: {d['poison_percent']*100:.1f}%\n")
              f.write(f"- Accuracy: {d['accuracy']:.4f}\n")
              f.write(f"- Poisoned Samples: {d['n_poisoned_samples']}\n")
          PYCODE
          cml comment create report.md
EOF

# Commit and push to week8 branch
git switch -c week8 || git switch week8
git add .
git commit -m "Add fixed CML workflow for poisoning demo"
git push origin week8 --force


fatal: A branch named 'week8' already exists.
Already on 'week8'


On branch week8
nothing to commit, working tree clean


remote: Invalid username or token. Password authentication is not supported for Git operations.
fatal: Authentication failed for 'https://github.com/23f2003589/iitm-mlops-w5.git/'


CalledProcessError: Command 'b'cd iitm-mlops-w5\n\n# Create workflow folder and YAML file\nmkdir -p .github/workflows\n\ncat > .github/workflows/data_poisoning.yml <<\'EOF\'\nname: Data Poisoning CML Report\n\non:\n  push:\n    branches: [week8]\n  pull_request:\n    branches: [main]\n\njobs:\n  poison-experiment:\n    runs-on: ubuntu-latest\n\n    steps:\n      - uses: actions/checkout@v3\n      - uses: actions/setup-python@v4\n        with:\n          python-version: \'3.9\'\n\n      - name: Install dependencies\n        run: |\n          pip install --upgrade pip setuptools wheel\n          pip install -U numpy==1.23.5 joblib cml\n          pip install scikit-learn==1.2.2 --only-binary=:all:\n\n      - name: Run poisoning demo\n        run: python poison_demo.py\n\n      - name: Generate and post CML report\n        env:\n          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n        run: |\n          python - <<\'PYCODE\'\n          import json\n          d = json.load(open(\'metrics.json\'))\n          with open(\'report.md\', \'w\') as f:\n              f.write(f"### Data Poisoning Report\\n\\n")\n              f.write(f"- Poison Percent: {d[\'poison_percent\']*100:.1f}%\\n")\n              f.write(f"- Accuracy: {d[\'accuracy\']:.4f}\\n")\n              f.write(f"- Poisoned Samples: {d[\'n_poisoned_samples\']}\\n")\n          PYCODE\n          cml comment create report.md\nEOF\n\n# Commit and push to week8 branch\ngit switch -c week8 || git switch week8\ngit add .\ngit commit -m "Add fixed CML workflow for poisoning demo"\ngit push origin week8 --force\n'' returned non-zero exit status 128.