In [2]:
import wandb
import pendulum
import random
import pandas as pd

In [3]:
# add your wandb API key here
WANDB_API_KEY = "776c36afb66bd3bbde7b0ba51d66a93fd46f9893"

In [4]:
# read and preprocess data
data = pd.read_csv("intro_ml_workshop_dataset.csv")
data.set_index('ds', inplace=True)
data.index = pd.to_datetime(data.index)
resampled_5m_data = data.resample('5min').mean().reset_index()
resampled_5m_data['unix_timestamp'] = resampled_5m_data['ds'].apply(lambda x: pendulum.parse(str(x)).timestamp())
resampled_5m_data = resampled_5m_data.fillna(method='ffill')

resampled_5m_data.head()

Unnamed: 0,ds,power,wetbulb_temperature,humidity,unix_timestamp
0,2023-06-27 17:05:00,2089.42477,88.4225,68.306249,1687886000.0
1,2023-06-27 17:10:00,2088.770633,87.902,68.366667,1687886000.0
2,2023-06-27 17:15:00,2088.951108,87.503,68.720832,1687886000.0
3,2023-06-27 17:20:00,2084.58676,87.08,69.434999,1687886000.0
4,2023-06-27 17:25:00,2070.732146,86.744,70.12,1687887000.0


## 1. Setup project

In [None]:
import wandb
import pendulum

# login to wandb
wandb.login(key=WANDB_API_KEY)

# setup project
dt_string = pendulum.now(tz='Asia/Bangkok').to_atom_string()
wandb.init(project="alto-academy-demo", name=dt_string)

# update config
config = wandb.config
config.learning_rate = 0.01

# log 1-step metrics
wandb.log({"loss": 0.1})

# [optional] finish the wandb run, necessary in notebooks
wandb.finish()

## 2. Log experiment Metrics

In [6]:
import wandb
import random
import pendulum

# login to wandb
wandb.login(key=WANDB_API_KEY)

# training config
epochs=10
lr=0.01

# setup project
dt_string = pendulum.now(tz='Asia/Bangkok').to_atom_string()
run = wandb.init(project="alto-academy-demo", 
                 name=dt_string,
                 config={
                     "learning_rate": lr,
                     "epochs": epochs,
                 })

# simulating a training run
offset = random.random() / 5
print(f"lr: {lr}")
for epoch in range(2, epochs):
    acc = 1 - 2 ** -epoch - random.random() / epoch - offset
    loss = 2 ** -epoch + random.random() / epoch + offset
    print(f"epoch={epoch}, accuracy={acc}, loss={loss}")
    
    # log metrics to wandb
    wandb.log({"accuracy": acc, "loss": loss})

# [optional] finish the wandb run, necessary in notebooks
wandb.finish()

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/thakorns/.netrc


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011167490733333176, max=1.0…

lr: 0.01
epoch=2, accuracy=0.2797586927830533, loss=0.8650953695419994
epoch=3, accuracy=0.5150342303431279, loss=0.3417202876847673
epoch=4, accuracy=0.6047508943370596, loss=0.29953125612357867
epoch=5, accuracy=0.6411853821125453, loss=0.27633385835832697
epoch=6, accuracy=0.7162251525429684, loss=0.23982086372588932
epoch=7, accuracy=0.7568122521469542, loss=0.3046787021295831
epoch=8, accuracy=0.7912187189946888, loss=0.2744709133229515
epoch=9, accuracy=0.75546195800488, loss=0.19053354808239492




0,1
accuracy,▁▄▅▆▇███
loss,█▃▂▂▂▂▂▁

0,1
accuracy,0.75546
loss,0.19053


## 3. Log experiment Plots and Tables

In [18]:
# login to wandb
wandb.login(key=WANDB_API_KEY)

# training config
epochs=10
lr=0.01

# setup project
dt_string = pendulum.now(tz='Asia/Bangkok').to_atom_string()
run = wandb.init(project="alto-academy-demo", 
                 name=dt_string,
                 config={
                     "learning_rate": lr,
                     "epochs": epochs,
                 })

# create table
table = wandb.Table(data=resampled_5m_data, columns = ["unix_timestamp", "power"])

# log plot
wandb.log({"my_custom_plot_id" : wandb.plot.line(table, "unix_timestamp", "power", title="Power Plot")})

wandb.finish()

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/thakorns/.netrc




## 4. Log experiment Artifacts

In [20]:
# login to wandb
wandb.login(key=WANDB_API_KEY)

# training config
epochs=10
lr=0.01

# setup project
dt_string = pendulum.now(tz='Asia/Bangkok').to_atom_string()
run = wandb.init(project="alto-academy-demo", 
                 name=dt_string,
                 config={
                     "learning_rate": lr,
                     "epochs": epochs,
                 })

# create table
table = wandb.Table(data=resampled_5m_data)

# setup artifact
table_artifact = wandb.Artifact("csv_artifact", type="dataset")
table_artifact.add(table, "table")
table_artifact.add_file("intro_ml_workshop_dataset.csv")

# log the table to visualize with a run
run.log({"csv_table": table})

# log CSV file as an Artifact
run.log_artifact(table_artifact)

wandb.finish()

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/thakorns/.netrc




In [24]:
import wandb

run = wandb.init()

# download artifact file to local
artifact = run.use_artifact('thakorneyp11/alto-academy-demo/csv_artifact:v0', type='dataset')
artifact_dir = artifact.download(root="./")



VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011167712500001573, max=1.0…

start


[34m[1mwandb[0m:   2 of 2 files downloaded.  


## 5. Callback function (with Scikit-learn)

### 5.1 normal training script

In [25]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import plotly.graph_objects as go

# Define features and target
X = resampled_5m_data[['wetbulb_temperature','humidity']]
y = resampled_5m_data['power']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize Linear Regression model
lr_model = LinearRegression()

# Train the model
lr_model.fit(X_train, y_train)

# Predict on test set
y_pred = lr_model.predict(X_test)

# Calculate the mean squared error
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error for Linear Regression: {mse:.2f}")

fig = go.Figure()
fig.add_trace(go.Scatter(x=y_test, y=y_pred, mode='markers', name='Predicted Values'))
fig.add_trace(go.Scatter(x=[y_test.min(), y_test.max()], y=[y_test.min(), y_test.max()], mode='lines', name='Perfect Fit'))
fig.update_layout(title='Linear Regression: True vs Predicted', xaxis_title='True Values', yaxis_title='Predicted Values', legend_title='Legend')
fig.show()

Mean Squared Error for Linear Regression: 196514.88


### 5.2 normal tracking in training script

In [27]:
import wandb
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import plotly.graph_objects as go

# login to wandb
wandb.login(key=WANDB_API_KEY)

# setup project
dt_string = pendulum.now(tz='Asia/Bangkok').to_atom_string()
run = wandb.init(project="alto-academy-demo", 
                 name=dt_string)

# Define features and target
X = resampled_5m_data[['wetbulb_temperature','humidity']]
y = resampled_5m_data['power']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize Linear Regression model
lr_model = LinearRegression()

# Train the model
lr_model.fit(X_train, y_train)

# Predict on test set
y_pred = lr_model.predict(X_test)

# Calculate the mean squared error
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error for Linear Regression: {mse:.2f}")

# Log metrics to W&B
wandb.log({'Mean Squared Error': mse})

# Optionally log model coefficients
wandb.log({'Coefficient': lr_model.coef_, 'Intercept': lr_model.intercept_})

fig = go.Figure()
fig.add_trace(go.Scatter(x=y_test, y=y_pred, mode='markers', name='Predicted Values'))
fig.add_trace(go.Scatter(x=[y_test.min(), y_test.max()], y=[y_test.min(), y_test.max()], mode='lines', name='Perfect Fit'))
fig.update_layout(title='Linear Regression: True vs Predicted', xaxis_title='True Values', yaxis_title='Predicted Values', legend_title='Legend')
fig.show()

wandb.finish()





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Intercept,▁
Mean Squared Error,▁

0,1
Intercept,-5004.69892
Mean Squared Error,196514.88191


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011167527777777448, max=1.0…

Mean Squared Error for Linear Regression: 196514.88




0,1
Intercept,▁
Mean Squared Error,▁

0,1
Intercept,-5004.69892
Mean Squared Error,196514.88191


### 5.3 tracking in training script with callback function for wandb

In [4]:
import wandb
from wandb.keras import WandbCallback
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# login to wandb
wandb.login(key=WANDB_API_KEY)

# setup project
dt_string = pendulum.now(tz='Asia/Bangkok').to_atom_string()
run = wandb.init(project="alto-academy-demo", 
                 name=dt_string)

# Define features and target
X = resampled_5m_data[['wetbulb_temperature','humidity']]
y = resampled_5m_data['power']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define a simple neural network model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(32, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model, logging metrics to W&B at the end of each epoch
model.fit(X_train_scaled, y_train, validation_data=(X_test_scaled, y_test), epochs=10, callbacks=[WandbCallback()])

wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mthakorneyp11[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/thakorns/.netrc




Epoch 1/10


  saving_api.save_model(
INFO:tensorflow:Assets written to: /Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best)... Done. 0.0s


Epoch 2/10


  saving_api.save_model(
INFO:tensorflow:Assets written to: /Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best)... Done. 0.0s


Epoch 3/10


  saving_api.save_model(
INFO:tensorflow:Assets written to: /Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best)... Done. 0.0s


Epoch 4/10


  saving_api.save_model(
INFO:tensorflow:Assets written to: /Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best)... Done. 0.0s


Epoch 5/10


  saving_api.save_model(
INFO:tensorflow:Assets written to: /Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best)... Done. 0.0s


Epoch 6/10


  saving_api.save_model(
INFO:tensorflow:Assets written to: /Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best)... Done. 0.0s


Epoch 7/10


  saving_api.save_model(
INFO:tensorflow:Assets written to: /Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best)... Done. 0.0s


Epoch 8/10

  saving_api.save_model(


INFO:tensorflow:Assets written to: /Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best/assets


INFO:tensorflow:Assets written to: /Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best)... Done. 0.0s


Epoch 9/10


  saving_api.save_model(
INFO:tensorflow:Assets written to: /Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best)... Done. 0.0s


Epoch 10/10


  saving_api.save_model(
INFO:tensorflow:Assets written to: /Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/thakorns/Desktop/AltoTech/codebases/alto-academy-part3-class6-automated-ml/wandb/run-20231028_055351-7132urj1/files/model-best)... Done. 0.0s




<keras.src.callbacks.History at 0x17581e0d0>

## 6. Hyperparameter tuning with Sweep

In [8]:
import wandb
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_squared_error

# login to wandb
wandb.login(key=WANDB_API_KEY)

# setup Sweep config
dt_string = pendulum.now(tz='Asia/Bangkok').to_atom_string()
sweep_configuration = {
    "name": f"sweep-{dt_string}",
    "metric": {"name": "Mean Squared Error", "goal": "minimize"},
    "method": "grid",
    "parameters": {
        "alpha": {"values": [0.1, 1.0, 10.0, 100.0]}
    },
}
sweep_id = wandb.sweep(sweep_configuration, project="alto-academy-demo")

def model_training():
    with wandb.init():
        # retrieve alpha value from sweep config
        alpha = wandb.config.alpha
        
        # Define features and target
        X = resampled_5m_data[['wetbulb_temperature','humidity']]
        y = resampled_5m_data['power']

        # Split data into training and test sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Initialize Ridge Regression model with alpha from config
        ridge_model = Ridge(alpha=alpha)

        # Train the model
        ridge_model.fit(X_train, y_train)

        # Predict on test set
        y_pred = ridge_model.predict(X_test)

        # Calculate the mean squared error
        mse = mean_squared_error(y_test, y_pred)

        # Log metrics to W&B
        wandb.log({'Mean Squared Error': mse})

# run the sweep
wandb.agent(sweep_id, function=model_training)

wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/thakorns/.netrc


Create sweep with ID: dcvqlt2v
Sweep URL: https://wandb.ai/thakorneyp11/alto-academy-demo/sweeps/dcvqlt2v


[34m[1mwandb[0m: Agent Starting Run: kqarw3ct with config:
[34m[1mwandb[0m: 	alpha: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.237890…

0,1
Mean Squared Error,▁

0,1
Mean Squared Error,196514.8963


[34m[1mwandb[0m: Agent Starting Run: m7gmr7ca with config:
[34m[1mwandb[0m: 	alpha: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011167809255555501, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.238146…

0,1
Mean Squared Error,▁

0,1
Mean Squared Error,196515.02581


[34m[1mwandb[0m: Agent Starting Run: 1sojftg9 with config:
[34m[1mwandb[0m: 	alpha: 10
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.237992…

0,1
Mean Squared Error,▁

0,1
Mean Squared Error,196516.32331


[34m[1mwandb[0m: Agent Starting Run: yz11lxa5 with config:
[34m[1mwandb[0m: 	alpha: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.245610…

0,1
Mean Squared Error,▁

0,1
Mean Squared Error,196529.53601


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.
