# 1. Getting Started
## a) Connection à Weights and Biases

In [None]:
# 1. Log in to your W&B account
import wandb

wandb.login()

## b) Première run W&B

In [None]:
# 2. Start a W&B Run
run = wandb.init(
    project="classification-car-accidents",
    name='My first run',
    tags=["baseline", "random-forest"],
)

In [None]:
#  3. Capture a dictionary of hyperparameters
params = {"n_estimators": 2, "criterion": 'gini', "max_depth": 2}

wandb.config = params

In [None]:
# 4. Train the model
import pandas as pd 
from sklearn.ensemble import RandomForestClassifier
import numpy as np

X_train = pd.read_csv('../data/preprocessed/X_train.csv')
X_test = pd.read_csv('../data/preprocessed/X_test.csv')
y_train = pd.read_csv('../data/preprocessed/y_train.csv')
y_test = pd.read_csv('../data/preprocessed/y_test.csv')
y_train = np.ravel(y_train)
y_test = np.ravel(y_test)

rf_classifier = RandomForestClassifier(**params)

rf_classifier.fit(X_train, y_train)

In [None]:
# 5. Capture a dictionary of metrics
train_accuracy = rf_classifier.score(X_train, y_train)
test_accuracy = rf_classifier.score(X_test, y_test)
wandb.log({"train_accuracy": train_accuracy, "test_accuracy": test_accuracy})

In [None]:
# 6. Track model artifact
import joblib

#Save the trained model to a file
model_filename = '../models/trained_model.joblib'
joblib.dump(rf_classifier, model_filename)

#Track the file
wandb.log_artifact(model_filename)

In [None]:
# 7. Finish the run
wandb.finish()

# 2. Visualisation des métriques

## c) Seconde Run W&B

In [None]:
# 1. Log in to your W&B account
wandb.login()

# 2. Start a W&B Run
run = wandb.init(
    project="classification-car-accidents",
    name='My second run',
    tags=["baseline", "Decision Tree"],
)

#  3. Capture a dictionary of hyperparameters
params = {"criterion": 'gini', "max_depth": 10}

wandb.config = params

# 4. Train the model
from sklearn.tree import DecisionTreeClassifier

dt_classifier = DecisionTreeClassifier(**params)
dt_classifier.fit(X_train, y_train)

In [None]:
# 5. Capture a dictionary of metrics 
train_accuracy = dt_classifier.score(X_train, y_train)
test_accuracy = dt_classifier.score(X_test, y_test)
wandb.log({"train_accuracy": train_accuracy, "test_accuracy": test_accuracy})

In [None]:
# 6. Track plots and log artifacts with sklearn.plot_classifier
y_pred = dt_classifier.predict(X_test)
y_probas = dt_classifier.predict_proba(X_test)
labels = ['non-prioritary accident', 'prioritary accident']

wandb.sklearn.plot_classifier(
    dt_classifier,
    X_train,
    X_test,
    y_train,
    y_test,
    y_pred,
    y_probas,
    labels,
    model_name="Decision Tree",
    feature_names=X_train.columns,
)

# 7. Finish the run
wandb.finish()

## b) Comparer les runs entre elles

In [None]:
# 1. Log in to your W&B account
wandb.login()

# 2. Start a W&B Run
run = wandb.init(
    project="classification-car-accidents",
    name='My third run',
    tags=["baseline", "Decision Tree"],
)

#  3. Capture a dictionary of hyperparameters
params = {"criterion": 'entropy', "max_depth": 20}

wandb.config = params

# 4. Train the model
from sklearn.tree import DecisionTreeClassifier

dt_classifier = DecisionTreeClassifier(**params)
dt_classifier.fit(X_train, y_train)

# 5. Capture a dictionary of metrics 
train_accuracy = dt_classifier.score(X_train, y_train)
test_accuracy = dt_classifier.score(X_test, y_test)
wandb.log({"train_accuracy": train_accuracy, "test_accuracy": test_accuracy})

# 6. Track plots and log artifacts with sklearn.plot_classifier
y_pred = dt_classifier.predict(X_test)
y_probas = dt_classifier.predict_proba(X_test)
labels = ['non-prioritary accident', 'prioritary accident']

wandb.sklearn.plot_classifier(
    dt_classifier,
    X_train,
    X_test,
    y_train,
    y_test,
    y_pred,
    y_probas,
    labels,
    model_name="Decision Tree",
    feature_names=X_train.columns,
)

# 7. Finish the run
wandb.finish()

# 3. Sweeps
## b) Méthodes et hyperparamètres

In [None]:
# 1. Pick a method
sweep_config = {
    'method': 'random'
    }

In [None]:
# 2. Name hyperparameters
parameters_dict = {
    'criterion': {
        'values': ['gini', 'entropy', 'log_loss']
        },
    'splitter': {
        'values': ['best', 'random']
        },
    'max_depth': {
          'values': [None, 10, 20, 50, 100, 200, 500]
        },
    'random_state': {
        'values': [42]
    }
    }

sweep_config['parameters'] = parameters_dict

## c) Lancement du Sweep

In [None]:
from sklearn.tree import DecisionTreeClassifier

# 3. Initialize the sweep
sweep_id = wandb.sweep(sweep_config, project="classification-car-accidents")

# 4. Define the training function
def train(parameters=None):
    run = wandb.init(
        project="classification-car-accidents",
        tags=["sweep", "Decision Tree"],
        config=parameters
    )

    parameters = wandb.config
    
    dt_classifier = DecisionTreeClassifier(**parameters)
    dt_classifier.fit(X_train, y_train)

    train_accuracy = dt_classifier.score(X_train, y_train)
    test_accuracy = dt_classifier.score(X_test, y_test)
    wandb.log({"train_accuracy": train_accuracy, "test_accuracy": test_accuracy})

    wandb.finish()

# 5. Run the sweep agent
wandb.agent(sweep_id, train, count=5)