In [2]:
# Vertex SDK for Python
! pip3 install --upgrade --quiet  google-cloud-aiplatform pandas google-cloud-storage scikit-learn joblib mlflow dvc dvc-gs

### Set Google Cloud Project Information

In [3]:
PROJECT_ID = "iitmbs-mlops"
LOCATION = "us-central1"

### Set GCS Information

In [4]:
BUCKET_URI = f"gs://iitmbs-mlops-21f1000344"

### Initialize Vertex AI SDK for Python

In [5]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=LOCATION, staging_bucket=BUCKET_URI)

### Import the required libraries

In [6]:
import os
import sys
import pandas as pd
import numpy as np

### Setup Git Repository

In [7]:
! git init

[33mhint: Using 'master' as the name for the initial branch. This default branch name[m
[33mhint: is subject to change. To configure the initial branch name to use in all[m
[33mhint: [m
[33mhint: 	git config --global init.defaultBranch <name>[m
[33mhint: [m
[33mhint: Names commonly chosen instead of 'master' are 'main', 'trunk' and[m
[33mhint: 'development'. The just-created branch can be renamed via this command:[m
[33mhint: [m
[33mhint: 	git branch -m <name>[m
Initialized empty Git repository in /home/jupyter/.git/


In [8]:
! git config --global user.email "chandrakarsatvik@gmail.com"

In [9]:
!git config --global user.name "Satvik Chandrakar"

In [10]:
!git checkout -b main

Switched to a new branch 'main'


### Install & Configure DVC

In [11]:
! dvc init

Initialized DVC repository.

You can now commit the changes to git.

[31m+---------------------------------------------------------------------+
[0m[31m|[0m                                                                     [31m|[0m
[31m|[0m        DVC has enabled anonymous aggregate usage analytics.         [31m|[0m
[31m|[0m     Read the analytics documentation (and how to opt-out) here:     [31m|[0m
[31m|[0m             <[36mhttps://dvc.org/doc/user-guide/analytics[39m>              [31m|[0m
[31m|[0m                                                                     [31m|[0m
[31m+---------------------------------------------------------------------+
[0m
[33mWhat's next?[39m
[33m------------[39m
- Check out the documentation: <[36mhttps://dvc.org/doc[39m>
- Get help and share ideas: <[36mhttps://dvc.org/chat[39m>
- Star us on GitHub: <[36mhttps://github.com/iterative/dvc[39m>
[0m

In [12]:
! git add .dvc

### Configure GCS as Remote Storage

In [13]:
! dvc remote add -d myremote {BUCKET_URI}

Setting 'myremote' as a default remote.
[0m

In [14]:
! dvc remote modify myremote credentialpath iitmbs-mlops-bb20ce0da3db.json

[0m

In [15]:
! git add .dvc/config

### Load Data : Original Version

In [16]:
df = pd.read_csv("gs://iitmbs-mlops-21f1000344/data/iris.csv")
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [17]:
df.shape

(150, 5)

In [18]:
df.to_csv("data/iris.csv", index=False)

### Track Data with DVC

In [19]:
! dvc add data/iris.csv

 [?25l[32m‚†ã[0m Checking graph
Adding...                                                                       
![A
Collecting files and computing hashes in data/iris.csv |0.00 [00:00,     ?file/s[A
                                                                                [A
![A
  0% Checking cache in '/home/jupyter/.dvc/cache/files/md5'| |0/? [00:00<?,    ?[A
                                                                                [A
![A
  0%|          |Adding data/iris.csv to cache         0/1 [00:00<?,     ?file/s][A
                                                                                [A
![A
  0%|          |Checking out /home/jupyter/data/iris.c0/1 [00:00<?,    ?files/s][A
100% Adding...|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà|1/1 [00:00, 17.83file/s][A

To track the changes with git, run:

	git add data/iris.csv.dvc

To enable auto staging, run:

	dvc config core

In [20]:
! git add data/.gitignore data/iris.csv.dvc

In [21]:
! dvc push

Collecting                                            |1.00 [00:00,  143entry/s]
Pushing
![A
  0% Checking cache in 'iitmbs-mlops-21f1000344/files/md5'| |0/? [00:00<?,    ?f[A
                                                                                [A
![A
  0% Checking cache in '/home/jupyter/.dvc/cache/files/md5'| |0/? [00:00<?,    ?[A
                                                                                [A
![A
  0%|          |Pushing to gs                         0/1 [00:00<?,     ?file/s][A

![A[A

  0%|          |/home/jupyter/.dvc/cache/files/0.00/3.77k [00:00<?,        ?B/s][A[A

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà|/home/jupyter/.dvc/cache/f3.77k/3.77k [00:00<00:00,    29.4kB/s][A[A

                                                                                [A[A
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà|Pushing to gs                     1/1 [00:00<00:00,  5.02file/s][A
Pushing                                                                        

In [22]:
!git status

On branch main

No commits yet

Changes to be committed:
  (use "git rm --cached <file>..." to unstage)
	[32mnew file:   .dvc/.gitignore[m
	[32mnew file:   .dvc/config[m
	[32mnew file:   .dvcignore[m
	[32mnew file:   data/.gitignore[m
	[32mnew file:   data/iris.csv.dvc[m

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	[31m.bashrc[m
	[31m.cache/[m
	[31m.config/[m
	[31m.docker/[m
	[31m.gitconfig[m
	[31m.gsutil/[m
	[31m.ipynb_checkpoints/[m
	[31m.ipython/[m
	[31m.jupyter/[m
	[31m.local/[m
	[31m.npm/[m
	[31mWeek8_GA_Setup.ipynb.ipynb[m
	[31miitmbs-mlops-bb20ce0da3db.json[m
	[31msrc/[m



In [23]:
!git commit -m "added the original version of iris dataset to dvc"

[main (root-commit) b5f83f3] added the original version of iris dataset to dvc
 5 files changed, 17 insertions(+)
 create mode 100644 .dvc/.gitignore
 create mode 100644 .dvc/config
 create mode 100644 .dvcignore
 create mode 100644 data/.gitignore
 create mode 100644 data/iris.csv.dvc


In [24]:
! git tag -a "v1" -m "original 150 rows of data"

### Data Poisoning Function

In [25]:
def poison_data(df, poison_percentage):
    poisoned_df = df.copy()
    n_samples = len(df)
    n_poison = int(n_samples * poison_percentage / 100)
    
    # Select random indices to poison
    poison_indices = np.random.choice(n_samples, n_poison, replace=False)
    
    numeric_cols = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
    
    for col in numeric_cols:
        # Generate noise with magnitude proportional to feature std
        noise_magnitude = poisoned_df[col].std() * 2.0
        noise = np.random.uniform(-noise_magnitude, noise_magnitude, n_poison)
        poisoned_df.loc[poison_indices, col] = poisoned_df.loc[poison_indices, col] + noise
    
    return poisoned_df

### Create Version 2 of Iris Data with 5% poisoning

In [26]:
df = pd.read_csv("gs://iitmbs-mlops-21f1000344/data/iris.csv")
df_poisoned = poison_data(df, 5)
df_poisoned.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [27]:
df_poisoned.to_csv("data/iris.csv", index=False)

### Track Data Version 2 with DVC

In [28]:
! dvc add data/iris.csv

 [?25l[32m‚†ã[0m Checking graph
Adding...                                                                       
![A
Collecting files and computing hashes in data/iris.csv |0.00 [00:00,     ?file/s[A
                                                                                [A
![A
  0% Checking cache in '/home/jupyter/.dvc/cache/files/md5'| |0/? [00:00<?,    ?[A
                                                                                [A
![A
  0%|          |Adding data/iris.csv to cache         0/1 [00:00<?,     ?file/s][A
                                                                                [A
![A
  0%|          |Checking out /home/jupyter/data/iris.c0/1 [00:00<?,    ?files/s][A
100% Adding...|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà|1/1 [00:00, 26.09file/s][A

To track the changes with git, run:

	git add data/iris.csv.dvc

To enable auto staging, run:

	dvc config core

In [29]:
! git add data/.gitignore data/iris.csv.dvc

In [30]:
! dvc push

Collecting                                            |1.00 [00:00,  144entry/s]
Pushing
![A
  0% Checking cache in 'iitmbs-mlops-21f1000344/files/md5'| |0/? [00:00<?,    ?f[A
                                                                                [A
![A
  0% Checking cache in '/home/jupyter/.dvc/cache/files/md5'| |0/? [00:00<?,    ?[A
                                                                                [A
![A
  0%|          |Pushing to gs                         0/1 [00:00<?,     ?file/s][A

![A[A

  0%|          |/home/jupyter/.dvc/cache/files/0.00/4.16k [00:00<?,        ?B/s][A[A

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà|/home/jupyter/.dvc/cache/f4.16k/4.16k [00:00<00:00,    35.3kB/s][A[A

                                                                                [A[A
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà|Pushing to gs                     1/1 [00:00<00:00,  6.21file/s][A
Pushing                                                                        

In [31]:
!git commit -m "added the version 2 of iris dataset with 5% Poisoning to dvc"

[main ff650ed] added the version 2 of iris dataset with 5% Poisoning to dvc
 1 file changed, 2 insertions(+), 2 deletions(-)


In [32]:
! git tag -a "v2" -m "150 rows of data with 5% poisoning"

### Create Version 3 of Iris Data with 10% poisoning

In [33]:
df = pd.read_csv("gs://iitmbs-mlops-21f1000344/data/iris.csv")
df_poisoned = poison_data(df, 10)
df_poisoned.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [34]:
df_poisoned.to_csv("data/iris.csv", index=False)

### Track Data Version 3 with DVC

In [35]:
! dvc add data/iris.csv

 [?25l[32m‚†ã[0m Checking graph
Adding...                                                                       
![A
Collecting files and computing hashes in data/iris.csv |0.00 [00:00,     ?file/s[A
                                                                                [A
![A
  0% Checking cache in '/home/jupyter/.dvc/cache/files/md5'| |0/? [00:00<?,    ?[A
                                                                                [A
![A
  0%|          |Adding data/iris.csv to cache         0/1 [00:00<?,     ?file/s][A
                                                                                [A
![A
  0%|          |Checking out /home/jupyter/data/iris.c0/1 [00:00<?,    ?files/s][A
100% Adding...|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà|1/1 [00:00, 27.18file/s][A

To track the changes with git, run:

	git add data/iris.csv.dvc

To enable auto staging, run:

	dvc config core

In [36]:
! git add data/.gitignore data/iris.csv.dvc

In [37]:
! dvc push

Collecting                                            |1.00 [00:00,  168entry/s]
Pushing
![A
  0% Checking cache in 'iitmbs-mlops-21f1000344/files/md5'| |0/? [00:00<?,    ?f[A
                                                                                [A
![A
  0% Checking cache in '/home/jupyter/.dvc/cache/files/md5'| |0/? [00:00<?,    ?[A
                                                                                [A
![A
  0%|          |Pushing to gs                         0/1 [00:00<?,     ?file/s][A

![A[A

  0%|          |/home/jupyter/.dvc/cache/files/0.00/4.62k [00:00<?,        ?B/s][A[A

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà|/home/jupyter/.dvc/cache/f4.62k/4.62k [00:00<00:00,    41.8kB/s][A[A

                                                                                [A[A
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà|Pushing to gs                     1/1 [00:00<00:00,  6.42file/s][A
Pushing                                                                        

In [38]:
!git commit -m "added the version 3 of iris dataset with 10% Poisoning to dvc"

[main 104f67d] added the version 3 of iris dataset with 10% Poisoning to dvc
 1 file changed, 2 insertions(+), 2 deletions(-)


In [39]:
! git tag -a "v3" -m "150 rows of data with 10% poisoning"

### Create Version 4 of Iris Data with 50% poisoning

In [40]:
df = pd.read_csv("gs://iitmbs-mlops-21f1000344/data/iris.csv")
df_poisoned = poison_data(df, 50)
df_poisoned.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.56005,3.642148,-1.716287,0.721653,setosa
1,5.196659,2.256479,0.974982,1.038133,setosa
2,4.7,3.2,1.3,0.2,setosa
3,5.823272,3.450448,2.989237,-0.008611,setosa
4,4.988983,3.907461,2.470418,-0.122632,setosa


In [41]:
df_poisoned.to_csv("data/iris.csv", index=False)

### Track Data Version 4 with DVC

In [42]:
! dvc add data/iris.csv

 [?25l[32m‚†ã[0m Checking graph
Adding...                                                                       
![A
Collecting files and computing hashes in data/iris.csv |0.00 [00:00,     ?file/s[A
                                                                                [A
![A
  0% Checking cache in '/home/jupyter/.dvc/cache/files/md5'| |0/? [00:00<?,    ?[A
                                                                                [A
![A
  0%|          |Adding data/iris.csv to cache         0/1 [00:00<?,     ?file/s][A
                                                                                [A
![A
  0%|          |Checking out /home/jupyter/data/iris.c0/1 [00:00<?,    ?files/s][A
100% Adding...|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà|1/1 [00:00, 25.84file/s][A

To track the changes with git, run:

	git add data/iris.csv.dvc

To enable auto staging, run:

	dvc config core

In [43]:
! git add data/.gitignore data/iris.csv.dvc

In [44]:
! dvc push

Collecting                                            |1.00 [00:00,  163entry/s]
Pushing
![A
  0% Checking cache in 'iitmbs-mlops-21f1000344/files/md5'| |0/? [00:00<?,    ?f[A
                                                                                [A
![A
  0% Checking cache in '/home/jupyter/.dvc/cache/files/md5'| |0/? [00:00<?,    ?[A
                                                                                [A
![A
  0%|          |Pushing to gs                         0/1 [00:00<?,     ?file/s][A

![A[A

  0%|          |/home/jupyter/.dvc/cache/files/0.00/7.99k [00:00<?,        ?B/s][A[A

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà|/home/jupyter/.dvc/cache/f7.99k/7.99k [00:00<00:00,    73.8kB/s][A[A

                                                                                [A[A
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà|Pushing to gs                     1/1 [00:00<00:00,  6.88file/s][A
Pushing                                                                        

In [45]:
!git commit -m "added the version 4 of iris dataset with 50% Poisoning to dvc"

[main fa72c33] added the version 4 of iris dataset with 50% Poisoning to dvc
 1 file changed, 2 insertions(+), 2 deletions(-)


In [46]:
! git tag -a "v4" -m "150 rows of data with 50% poisoning"

### Setup MLFlow Server

In [None]:
# SSH into the VM and run the following command to start MLFlow server
# pip install mlflow
# mlflow server --host 0.0.0.0 --port 8100 --allowed-hosts '*'  --cors-allowed-origins '*'

### Data Version Traversal with `dvc checkout` and Model Training

In [47]:
version = "v1"
!git checkout {version}
!dvc checkout

Note: switching to 'v1'.

You are in 'detached HEAD' state. You can look around, make experimental
changes and commit them, and you can discard any commits you make in this
state without impacting any branches by switching back to a branch.

If you want to create a new branch to retain commits you create, you may
do so (now or later) by using -c with the switch command. Example:

  git switch -c <new-branch-name>

Or undo this operation with:

  git switch -

Turn off this advice by setting config variable advice.detachedHead to false

HEAD is now at b5f83f3 added the original version of iris dataset to dvc
Building workspace index                              |2.00 [00:00, 7.30entry/s]
Comparing indexes                                     |3.00 [00:00,  933entry/s]
Applying changes                                      |1.00 [00:00,   243file/s]
[33mM[0m       data/iris.csv
[0m

In [48]:
! python src/train.py --version {version} --poison_percentage 0
! gsutil cp artifacts/model.joblib {BUCKET_URI}/models/

2025/11/14 13:15:59 INFO mlflow.tracking.fluent: Experiment with name 'Iris_DT_Classification_Poisoning_Exp' does not exist. Creating a new experiment.
Loading local data from data/iris.csv...
Training Decision Tree model...
Accuracy: 1.000
Precision: 1.000
Recall: 1.000
F1 Score: 1.000
Model saved to artifacts/model.joblib
Successfully registered model 'IRIS-Classifier-dt'.
2025/11/14 13:16:04 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: IRIS-Classifier-dt, version 1
Created version '1' of model 'IRIS-Classifier-dt'.
Training and logging complete!
üèÉ View run masked-pug-493 at: http://127.0.0.1:8100/#/experiments/428268995267881077/runs/c6b32ad0f086433c8c6581a537ac7bd6
üß™ View experiment at: http://127.0.0.1:8100/#/experiments/428268995267881077
Copying file://artifacts/model.joblib [Content-Type=application/octet-stream]...
/ [1 files][  2.8 KiB/  2.8 KiB]                                              

In [49]:
version = "v2"
!git checkout {version}
!dvc checkout

Previous HEAD position was b5f83f3 added the original version of iris dataset to dvc
HEAD is now at ff650ed added the version 2 of iris dataset with 5% Poisoning to dvc
Building workspace index                              |2.00 [00:00, 7.13entry/s]
Comparing indexes                                    |3.00 [00:00, 1.00kentry/s]
Applying changes                                      |1.00 [00:00,   236file/s]
[33mM[0m       data/iris.csv
[0m

In [50]:
! python src/train.py --version {version} --poison_percentage 5
! gsutil cp artifacts/model.joblib {BUCKET_URI}/models/

Loading local data from data/iris.csv...
Training Decision Tree model...
Accuracy: 1.000
Precision: 1.000
Recall: 1.000
F1 Score: 1.000
Model saved to artifacts/model.joblib
Registered model 'IRIS-Classifier-dt' already exists. Creating a new version of this model...
2025/11/14 13:16:46 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: IRIS-Classifier-dt, version 2
Created version '2' of model 'IRIS-Classifier-dt'.
Training and logging complete!
üèÉ View run blushing-smelt-860 at: http://127.0.0.1:8100/#/experiments/428268995267881077/runs/136ba18847f347578214d00ce2abe483
üß™ View experiment at: http://127.0.0.1:8100/#/experiments/428268995267881077
Copying file://artifacts/model.joblib [Content-Type=application/octet-stream]...
/ [1 files][  3.2 KiB/  3.2 KiB]                                                
Operation completed over 1 objects/3.2 KiB.                                      


In [51]:
version = "v3"
!git checkout {version}
!dvc checkout

Previous HEAD position was ff650ed added the version 2 of iris dataset with 5% Poisoning to dvc
HEAD is now at 104f67d added the version 3 of iris dataset with 10% Poisoning to dvc
Building workspace index                              |2.00 [00:00, 7.55entry/s]
Comparing indexes                                    |3.00 [00:00, 1.15kentry/s]
Applying changes                                      |1.00 [00:00,   295file/s]
[33mM[0m       data/iris.csv
[0m

In [52]:
! python src/train.py --version {version} --poison_percentage 10
! gsutil cp artifacts/model.joblib {BUCKET_URI}/models/

Loading local data from data/iris.csv...
Training Decision Tree model...
Accuracy: 0.967
Precision: 0.969
Recall: 0.967
F1 Score: 0.966
Model saved to artifacts/model.joblib
Registered model 'IRIS-Classifier-dt' already exists. Creating a new version of this model...
2025/11/14 13:17:10 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: IRIS-Classifier-dt, version 3
Created version '3' of model 'IRIS-Classifier-dt'.
Training and logging complete!
üèÉ View run bemused-stag-277 at: http://127.0.0.1:8100/#/experiments/428268995267881077/runs/067e08bcd6194764834794501d69f2d3
üß™ View experiment at: http://127.0.0.1:8100/#/experiments/428268995267881077
Copying file://artifacts/model.joblib [Content-Type=application/octet-stream]...
/ [1 files][  3.0 KiB/  3.0 KiB]                                                
Operation completed over 1 objects/3.0 KiB.                                      


In [53]:
version = "v4"
!git checkout {version}
!dvc checkout

Previous HEAD position was 104f67d added the version 3 of iris dataset with 10% Poisoning to dvc
HEAD is now at fa72c33 added the version 4 of iris dataset with 50% Poisoning to dvc
Building workspace index                              |2.00 [00:00, 7.51entry/s]
Comparing indexes                                     |3.00 [00:00,  974entry/s]
Applying changes                                      |1.00 [00:00,   187file/s]
[33mM[0m       data/iris.csv
[0m

In [54]:
! python src/train.py --version {version} --poison_percentage 50
! gsutil cp artifacts/model.joblib {BUCKET_URI}/models/

Loading local data from data/iris.csv...
Training Decision Tree model...
Accuracy: 0.800
Precision: 0.817
Recall: 0.800
F1 Score: 0.805
Model saved to artifacts/model.joblib
Registered model 'IRIS-Classifier-dt' already exists. Creating a new version of this model...
2025/11/14 13:17:29 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: IRIS-Classifier-dt, version 4
Created version '4' of model 'IRIS-Classifier-dt'.
Training and logging complete!
üèÉ View run aged-snake-578 at: http://127.0.0.1:8100/#/experiments/428268995267881077/runs/82228d1edcf34a13ae7b9e0e61c7454c
üß™ View experiment at: http://127.0.0.1:8100/#/experiments/428268995267881077
Copying file://artifacts/model.joblib [Content-Type=application/octet-stream]...
/ [1 files][  4.0 KiB/  4.0 KiB]                                                
Operation completed over 1 objects/4.0 KiB.                                      


### Add to Git and Commit

In [55]:
!git status

[31mHEAD detached at [mv4
Untracked files:
  (use "git add <file>..." to include in what will be committed)
	[31m.bashrc[m
	[31m.cache/[m
	[31m.config/[m
	[31m.docker/[m
	[31m.gitconfig[m
	[31m.gsutil/[m
	[31m.ipynb_checkpoints/[m
	[31m.ipython/[m
	[31m.jupyter/[m
	[31m.local/[m
	[31m.npm/[m
	[31mWeek8_GA_Setup.ipynb.ipynb[m
	[31martifacts/[m
	[31miitmbs-mlops-bb20ce0da3db.json[m
	[31msrc/[m

nothing added to commit but untracked files present (use "git add" to track)


In [56]:
! git checkout main

Switched to branch 'main'


In [57]:
!git log

[33mcommit fa72c33a2422f528ddd3d4ce24f2ddc789263bf6[m[33m ([m[1;36mHEAD -> [m[1;32mmain[m[33m, [m[1;33mtag: v4[m[33m)[m
Author: Satvik Chandrakar <chandrakarsatvik@gmail.com>
Date:   Fri Nov 14 13:13:32 2025 +0000

    added the version 4 of iris dataset with 50% Poisoning to dvc

[33mcommit 104f67dd06b605627e4fe6bebcf768b848a077dc[m[33m ([m[1;33mtag: v3[m[33m)[m
Author: Satvik Chandrakar <chandrakarsatvik@gmail.com>
Date:   Fri Nov 14 13:13:05 2025 +0000

    added the version 3 of iris dataset with 10% Poisoning to dvc

[33mcommit ff650ed73df511efd1ca6831c377de73a748cde7[m[33m ([m[1;33mtag: v2[m[33m)[m
Author: Satvik Chandrakar <chandrakarsatvik@gmail.com>
Date:   Fri Nov 14 13:12:38 2025 +0000

    added the version 2 of iris dataset with 5% Poisoning to dvc

[33mcommit b5f83f3aa3706849dffc79b4a04ce86b319ed5b5[m[33m ([m[1;33mtag: v1[m[33m)[m
Author: Satvik Chandrakar <chandrakarsatvik@gmail.com>
Date:   Fri Nov 14 13:11:39 2025 +0000

    added 

In [58]:
!git add artifacts/ data/ src/ .dvc/ .gitconfig .dvcignore

In [59]:
!git status

On branch main
Changes to be committed:
  (use "git restore --staged <file>..." to unstage)
	[32mnew file:   .gitconfig[m
	[32mnew file:   artifacts/model.joblib[m
	[32mnew file:   src/.ipynb_checkpoints/train-checkpoint.py[m
	[32mnew file:   src/train.py[m

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	[31m.bashrc[m
	[31m.cache/[m
	[31m.config/[m
	[31m.docker/[m
	[31m.gsutil/[m
	[31m.ipynb_checkpoints/[m
	[31m.ipython/[m
	[31m.jupyter/[m
	[31m.local/[m
	[31m.npm/[m
	[31mWeek8_GA_Setup.ipynb.ipynb[m
	[31miitmbs-mlops-bb20ce0da3db.json[m



In [60]:
! git commit -m "Model trained using various level of data poisoning"

[main bca116b] Model trained using various level of data poisoning
 4 files changed, 211 insertions(+)
 create mode 100644 .gitconfig
 create mode 100644 artifacts/model.joblib
 create mode 100644 src/.ipynb_checkpoints/train-checkpoint.py
 create mode 100644 src/train.py


In [61]:
!git log

[33mcommit bca116b3c7e66ca554f7859b4d980a4a203c2322[m[33m ([m[1;36mHEAD -> [m[1;32mmain[m[33m)[m
Author: Satvik Chandrakar <chandrakarsatvik@gmail.com>
Date:   Fri Nov 14 13:19:15 2025 +0000

    Model trained using various level of data poisoning

[33mcommit fa72c33a2422f528ddd3d4ce24f2ddc789263bf6[m[33m ([m[1;33mtag: v4[m[33m)[m
Author: Satvik Chandrakar <chandrakarsatvik@gmail.com>
Date:   Fri Nov 14 13:13:32 2025 +0000

    added the version 4 of iris dataset with 50% Poisoning to dvc

[33mcommit 104f67dd06b605627e4fe6bebcf768b848a077dc[m[33m ([m[1;33mtag: v3[m[33m)[m
Author: Satvik Chandrakar <chandrakarsatvik@gmail.com>
Date:   Fri Nov 14 13:13:05 2025 +0000

    added the version 3 of iris dataset with 10% Poisoning to dvc

[33mcommit ff650ed73df511efd1ca6831c377de73a748cde7[m[33m ([m[1;33mtag: v2[m[33m)[m
Author: Satvik Chandrakar <chandrakarsatvik@gmail.com>
Date:   Fri Nov 14 13:12:38 2025 +0000

    added the version 2 of iris dataset with 5

In [62]:
!git remote add origin https://Satvik-ai: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@github.com/Satvik-ai/mlops-assignment-8.git

In [63]:
!git push -u origin main

Enumerating objects: 29, done.
Counting objects: 100% (29/29), done.
Delta compression using up to 4 threads
Compressing objects: 100% (25/25), done.
Writing objects: 100% (29/29), 5.36 KiB | 914.00 KiB/s, done.
Total 29 (delta 4), reused 0 (delta 0), pack-reused 0
remote: Resolving deltas: 100% (4/4), done.[K
To https://github.com/Satvik-ai/mlops-assignment-8.git
 * [new branch]      main -> main
Branch 'main' set up to track remote branch 'main' from 'origin'.
