<a href="https://colab.research.google.com/github/Chirag314/Wandb/blob/main/WnB_practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Weight & Biases is a free Python library that allows you to track, compare, and visualize ML experiments. It is also integrated with many popular libraries such as TensorFlow, PyTorch, Kera, Scikit, Hugging Face, and XGBoost.

In [16]:
# Install W & B
!pip install wandb



In [17]:
import wandb
wandb.login()



True

In [18]:
# Work on IRIS dataset
from sklearn.model_selection import cross_val_score
from sklearn import datasets
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.linear_model import LogisticRegression


In [19]:
#Load iris dataset
df=datasets.load_iris()
X=df.data
y=df.target

#split into train and test set
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

model=LogisticRegression(solver='liblinear',multi_class='ovr')

kfold=StratifiedKFold(n_splits=10,random_state=42,shuffle=True)

#metrics
accuracy=cross_val_score(model,X_train,y_train,cv=kfold,scoring='accuracy').mean()
f1_macro=cross_val_score(model,X_train,y_train,cv=kfold,scoring='f1_macro').mean()
neg_log_loss=cross_val_score(model,X_train,y_train,cv=kfold,scoring='neg_log_loss').mean()

In [20]:
#Initilize wandb project so that all outputs for this experiment will be saved under IRIS project
wandb.init(project='iris')

Log all metrics

In [21]:
wandb.log({'accuracy':accuracy,
           'f1_macro':f1_macro,
           'neg_log_loss':neg_log_loss})


This is just one experiment. We can add many experiments and check results.

In [22]:
def main(name_model,model):
  wandb.init(project='iris',
             group=name_model,
             reinit=True)
  # Load dataset
  df = datasets.load_iris()
  X = df.data
  y = df.target
  # Split into train and test set
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1)

  kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)
  accuracy = cross_val_score(model, X_train, y_train, cv=kfold, scoring='accuracy').mean()
  f1_macro = cross_val_score(model, X_train, y_train, cv=kfold, scoring='f1_macro').mean()
  neg_log_loss = cross_val_score(model, X_train, y_train, cv=kfold, scoring='neg_log_loss').mean()

  wandb.log({'accuracy': accuracy,
                'f1_macro': f1_macro,
                'neg_log_loss': neg_log_loss})




In [23]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB

In [24]:
if __name__=='__main__':
  models={"LogisticRegression":LogisticRegression(solver='liblinear',multi_class='ovr'),
          'LinearDiscriminantAnalysis':LinearDiscriminantAnalysis(),
          'KneighborsClassifier':KNeighborsClassifier(),
          'DecisionTreeClassifier':DecisionTreeClassifier(),
          'GaussianNB':GaussianNB()}

  for name,model in models.items():
    main(name,model)

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁
f1_macro,▁
neg_log_loss,▁

0,1
accuracy,0.95833
f1_macro,0.95794
neg_log_loss,-0.33454


VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.105731…

0,1
accuracy,▁
f1_macro,▁
neg_log_loss,▁

0,1
accuracy,0.94167
f1_macro,0.93782
neg_log_loss,-0.32999


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113281944461859, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.105757…

0,1
accuracy,▁
f1_macro,▁
neg_log_loss,▁

0,1
accuracy,0.975
f1_macro,0.97337
neg_log_loss,-0.08502


VBox(children=(Label(value='0.010 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁
f1_macro,▁
neg_log_loss,▁

0,1
accuracy,0.95833
f1_macro,0.95644
neg_log_loss,-0.37916


VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.105741…

0,1
accuracy,▁
f1_macro,▁
neg_log_loss,▁

0,1
accuracy,0.94167
f1_macro,0.93951
neg_log_loss,-1.50182


Visualize Sklearn Models
wandb also allows us to create common plots to evaluate Sklearn models with built-in functions.

To visualize all classification plots,

In [25]:
# Load libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import wandb

def main(name_model, model):

    wandb.init(project='iris',
                group=name_model, # Group experiments by model
    )

    # Load dataset
    df = datasets.load_iris()
    X = df.data
    y = df.target
    features = df.feature_names

    # Split into train and test set
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1)

    # Train model, get predictions
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_probas = model.predict_proba(X_test)

    # Visualize all classification plots
    wandb.sklearn.plot_classifier(model,X_train,X_test,y_train,y_test,y_pred,y_probas,features,model_name=name_model)

if __name__=='__main__':
  main('LinearDiscriminantAnalysis',LinearDiscriminantAnalysis())

VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.145600…

0,1
accuracy,▁
f1_macro,▁
neg_log_loss,▁

0,1
accuracy,0.95
f1_macro,0.94392
neg_log_loss,-0.14124


[34m[1mwandb[0m: 
[34m[1mwandb[0m: Plotting LinearDiscriminantAnalysis.
[34m[1mwandb[0m: Logged feature importances.
[34m[1mwandb[0m: Logged confusion matrix.
[34m[1mwandb[0m: Logged summary metrics.
[34m[1mwandb[0m: Logged class proportions.
[34m[1mwandb[0m: Logged calibration curve.
[34m[1mwandb[0m: Logged roc curve.
[34m[1mwandb[0m: Logged precision-recall curve.


In [26]:
wandb.finish()

VBox(children=(Label(value='0.010 MB of 0.023 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.431572…