Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .dvc/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/config.local
/tmp
/cache
4 changes: 4 additions & 0 deletions .dvc/config
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[core]
remote = mygd
['remote "mygd"']
url = gdrive://1Kk68L-RBcmvF5xtufnEOc80VIVvsFXof
3 changes: 3 additions & 0 deletions .dvcignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Add patterns of files dvc should ignore, which could improve
# the performance. Learn more at
# https://dvc.org/doc/user-guide/dvcignore
49 changes: 31 additions & 18 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,38 @@
name: model-wine-quality
on: [push]
name: CML Report
on: push
jobs:
run:
runs-on: [ubuntu-latest]
container: docker://dvcorg/cml-py3:latest
steps:
- uses: actions/checkout@v2
- name: cml_run
- uses: iterative/setup-cml@v1
- uses: iterative/setup-dvc@v1
- uses: actions/checkout@v3
with:
fetch-depth: 2
- name: Generate metrics report
env:
repo_token: ${{ secrets.GITHUB_TOKEN }}
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
if [ $GITHUB_REF = refs/heads/main ]; then
PREVIOUS_REF=HEAD~1
else
PREVIOUS_REF=main
fi

# Your ML workflow goes here
pip install -r requirements.txt
python train.py

echo "## Model metrics" > report.md
cat metrics.txt >> report.md

echo "## Data viz" >> report.md
cml-publish feature_importance.png --md >> report.md
cml-publish residuals.png --md >> report.md

cml-send-comment report.md
echo "# CML Report" > report.md
echo "## Plots" >> report.md
dvc plots diff $PREVIOUS_REF workspace \
--show-vega --targets evaluation/plots/precision_recall.json > vega.json
vl2svg vega.json prc.svg
cml publish prc.svg --title "Precision & Recall" --md >> report.md

dvc plots diff $PREVIOUS_REF workspace \
--show-vega --targets evaluation/plots/confusion_matrix.json > vega.json
vl2svg vega.json confusion.svg
cml publish confusion.svg --title "Confusion Matrix" --md >> report.md

echo "## Metrics and Params" >> report.md
echo "### $PREVIOUS_REF → workspace" >> report.md
dvc exp diff $PREVIOUS_REF --show-md >> report.md

cml send-comment report.md
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/model.pkl
3 changes: 3 additions & 0 deletions data/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/data.xml
/prepared
/features
12 changes: 12 additions & 0 deletions data/data.xml.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
md5: 8ad921bd158a9b447da32d275f72eb98
frozen: true
deps:
- path: data.xml
repo:
url: https://github.com/NerdToMars/wine_data.git
rev: v0.1
rev_lock: e6ed3c40ac4f03d9387791da1ced5acced35ae9b
outs:
- md5: 079fbd15fa2c32c539c4c4e3675b514a
size: 28890194
path: data.xml
93 changes: 93 additions & 0 deletions dvc.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
schema: '2.0'
stages:
prepare:
cmd: python src/prepare.py data/data.xml
deps:
- path: data/data.xml
md5: 079fbd15fa2c32c539c4c4e3675b514a
size: 28890194
- path: src/prepare.py
md5: f09ea0c15980b43010257ccb9f0055e2
size: 1576
params:
params.yaml:
prepare.seed: 20170428
prepare.split: 0.2
outs:
- path: data/prepared
md5: 2fe72f304d64c28867d884e798568460.dir
size: 16874726
nfiles: 2
featurize:
cmd: python src/featurization.py data/prepared data/features
deps:
- path: data/prepared
md5: 2fe72f304d64c28867d884e798568460.dir
size: 16874726
nfiles: 2
- path: src/featurization.py
md5: e0265fc22f056a4b86d85c3056bc2894
size: 2490
params:
params.yaml:
featurize.max_features: 100
featurize.ngrams: 2
outs:
- path: data/features
md5: 9202bf271b063e36efe33700e553fcef.dir
size: 3295678
nfiles: 2
train:
cmd: python src/train.py data/features model.pkl
deps:
- path: data/features
md5: 9202bf271b063e36efe33700e553fcef.dir
size: 3295678
nfiles: 2
- path: src/train.py
md5: c3961d777cfbd7727f9fde4851896006
size: 967
params:
params.yaml:
train.min_split: 0.01
train.n_est: 50
train.seed: 20170428
outs:
- path: model.pkl
md5: 71fa4c5d016f2fc1c00a6fd3d2385e57
size: 1874902
isexec: true
evaluate:
cmd: python src/evaluate.py model.pkl data/features
deps:
- path: data/features
md5: 9202bf271b063e36efe33700e553fcef.dir
size: 3295678
nfiles: 2
- path: model.pkl
md5: 71fa4c5d016f2fc1c00a6fd3d2385e57
size: 1874902
- path: src/evaluate.py
md5: 44e714021a65edf881b1716e791d7f59
size: 2346
outs:
- path: evaluation.json
md5: b07f807e73ad5e67347a6956c671c3aa
size: 72
isexec: true
- path: evaluation/importance.png
md5: 629d6f80e9fc5cfa2b3d872a4a5cbd1c
size: 32683
isexec: true
- path: evaluation/plots/confusion_matrix.json
md5: b99494fb5d522a16def52938bef636c2
size: 243331
isexec: true
- path: evaluation/plots/precision_recall.json
md5: 1a8e2a78fa5608d46c985ea850276598
size: 133445
isexec: true
- path: evaluation/plots/roc.json
md5: e88b258d360279efa3eab6b68716a874
size: 159030
isexec: true
56 changes: 56 additions & 0 deletions dvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
stages:
prepare:
cmd: python src/prepare.py data/data.xml
deps:
- data/data.xml
- src/prepare.py
params:
- prepare.seed
- prepare.split
outs:
- data/prepared
featurize:
cmd: python src/featurization.py data/prepared data/features
deps:
- data/prepared
- src/featurization.py
params:
- featurize.max_features
- featurize.ngrams
outs:
- data/features
train:
cmd: python src/train.py data/features model.pkl
deps:
- data/features
- src/train.py
params:
- train.min_split
- train.n_est
- train.seed
outs:
- model.pkl
evaluate:
cmd: python src/evaluate.py model.pkl data/features
deps:
- data/features
- model.pkl
- src/evaluate.py
metrics:
- evaluation.json:
cache: false
plots:
- evaluation/importance.png
- evaluation/plots/confusion_matrix.json:
cache: false
template: confusion
x: actual
y: predicted
- evaluation/plots/precision_recall.json:
cache: false
x: recall
y: precision
- evaluation/plots/roc.json:
cache: false
x: fpr
y: tpr
4 changes: 4 additions & 0 deletions evaluation.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"avg_prec": 0.916698900628581,
"roc_auc": 0.9508481917506465
}
1 change: 1 addition & 0 deletions evaluation/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/importance.png
Loading