# Step 0: Install dependencies

In [None]:
# ! pip3 install --upgrade google-cloud-aiplatform \
#                             google-cloud-storage

# Step 1: Authenticate your Google Cloud account

In [None]:
## Ejecutar en terminal (https://cloud.google.com/docs/authentication/provide-credentials-adc#how-to)
# gcloud auth login
# gcloud init
# gcloud auth application-default login

# Step 2: Upload the Dataset to GCS

In [1]:
from google.cloud import storage

def upload_blob(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the specified GCS bucket."""
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)

    print(f"File {source_file_name} uploaded to {destination_blob_name}.")

# Replace these variables with your information
source_file_name = 'train_data/iris_dataset.csv'
bucket_name = 'bucket-dmc-jose'
destination_blob_name = 'automl_demo/iris_dataset.csv'

upload_blob(bucket_name, source_file_name, destination_blob_name)


File train_data/iris_dataset.csv uploaded to automl_demo/iris_dataset.csv.


# Step 3: Create a Tabular Dataset in Vertex AI

In [3]:
from google.cloud import aiplatform

aiplatform.init(project='astral-reef-391421', 
                location='us-central1')

dataset = aiplatform.TabularDataset.create(
    display_name="custom_jose_dataset",
    gcs_source=f'gs://{bucket_name}/{destination_blob_name}'
)

print(dataset.resource_name)

Creating TabularDataset
Create TabularDataset backing LRO: projects/608375853532/locations/us-central1/datasets/5168827251919159296/operations/5978883681949843456
TabularDataset created. Resource name: projects/608375853532/locations/us-central1/datasets/5168827251919159296
To use this TabularDataset in another session:
ds = aiplatform.TabularDataset('projects/608375853532/locations/us-central1/datasets/5168827251919159296')
projects/608375853532/locations/us-central1/datasets/5168827251919159296


# Step 4: Train the AutoML Model

In [4]:
from google.cloud import aiplatform

# Define your transformations and label_column as before

job = aiplatform.AutoMLTabularTrainingJob(
    display_name                 = "custom_jose_job",
    optimization_prediction_type = "classification",
    optimization_objective       = "minimize-log-loss"
)

model = job.run(
    dataset                   = dataset,
    model_display_name        = "custom_jose_model",
    training_fraction_split   = 0.6,
    validation_fraction_split = 0.2,
    test_fraction_split       = 0.2,
    budget_milli_node_hours   = 1000,
    disable_early_stopping    = False,
    target_column             = "target",
)


No column transformations provided, so now retrieving columns from dataset in order to set default column transformations.
The column transformation of type 'auto' was set for the following columns: ['sepal_width_cm', 'sepal_length_cm', 'petal_length_cm', 'petal_width_cm'].
View Training:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/2952426249449897984?project=608375853532
AutoMLTabularTrainingJob projects/608375853532/locations/us-central1/trainingPipelines/2952426249449897984 current state:
PipelineState.PIPELINE_STATE_PENDING
AutoMLTabularTrainingJob projects/608375853532/locations/us-central1/trainingPipelines/2952426249449897984 current state:
PipelineState.PIPELINE_STATE_RUNNING
AutoMLTabularTrainingJob projects/608375853532/locations/us-central1/trainingPipelines/2952426249449897984 current state:
PipelineState.PIPELINE_STATE_RUNNING
AutoMLTabularTrainingJob projects/608375853532/locations/us-central1/trainingPipelines/2952426249449897984 current s

RuntimeError: Training failed with:
code: 3
message: "Too few rows: 150. Minimum number is: 1000"
