In [None]:
import sys
sys.path.append('../../../config.py')
from config import YOUR_NAME

pip install google-cloud-aiplatform

In [43]:

PROJECT_ID = "training1emakia"  # @param {type:"string"}

In [None]:

# set the project id
! gcloud config set project $PROJECT_ID

In [None]:
# Set the project id
! gcloud config set project {PROJECT_ID}

In [None]:
! gcloud config list

In [31]:
REGION = "us-central1"  # @param {type: "string"}

In [48]:
#Create a Cloud Storage bucket
#Create a storage bucket to store intermediate artifacts such as datasets.
BUCKET_NAME = f"{YOUR_NAME}tweetsdatatraining1"  # @param {type:"string"}
BUCKET_URI = f"gs://{BUCKET_NAME}"

In [None]:
! gsutil mb -l us-central1 gs://{BUCKET_NAME}

In [None]:
SERVICE_ACCOUNT = !gcloud config list --format='value(core.account)' 
SERVICE_ACCOUNT = SERVICE_ACCOUNT[0]
SERVICE_ACCOUNT

In [None]:
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectCreator $BUCKET_URI

In [52]:
#Initialize Vertex AI
#Initialize the Vertex AI SDK for Python for the  project.
from google.cloud import aiplatform

In [53]:

from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

In [54]:
display_name = f"{YOUR_NAME}-english-tweetsdata-{TIMESTAMP}"

In [None]:
#Create a Dataset resource and import the data
#Using the Python SDK, we create a dataset and import the dataset in one call to TextDataset.create(), 
#as shown in the following cell.
#Creating and importing data is a long-running operation. 
# Use a timestamp to ensure unique resources
src_uris = f"gs://data_{YOUR_NAME}/tweets-labels.csv"

text_dataset = aiplatform.TextDataset.create(
    display_name=display_name,
    gcs_source=src_uris,
    import_schema_uri=aiplatform.schema.dataset.ioformat.text.single_label_classification,
    sync=True,
)
text_dataset.wait()

In [None]:
#Train the text classification model
#Training the model is a two part process:

#Define the training job. You must provide a display name and the type of training you want when you define the training job.
#Run the training job. 
#When running the training job, you need to supply a reference to the dataset to use for training. 
#At this step, you can also configure the data split percentages.
#You do not need to specify data splits. 
#The training job has a default setting of training 80%/ testing 10%/ validate 10% if you don't provide these values.

#To train your model, you call AutoMLTextTrainingJob.run().
#The method returns a reference to the new Model object.

#As with importing data into the dataset, training your model can take a substantial amount of time. 
#The client library prints out operation status messages while the training pipeline operation processes. 
#We must wait for the training process to complete before we can get the resource name and ID of the new model, 
#which is required for model evaluation and model deployment.


In [57]:
# Define the training job
training_job_display_name = f"{YOUR_NAME}-english-tweet-text-training-job-{TIMESTAMP}"
job = aiplatform.AutoMLTextTrainingJob(
    display_name=training_job_display_name,
    prediction_type="classification",
    multi_label=False,
)

In [None]:
text_dataset

In [59]:
text_dataset = aiplatform.TextDataset("5913324167272333312")

In [None]:
text_dataset

In [None]:
# Run the Training job
model_display_name = f"{YOUR_NAME}-tweet-text-classification-model-{TIMESTAMP}"

#text_dataset = aiplatform.TextDataset("9181496036999299072")

model = job.run(
    dataset= text_dataset,
    model_display_name=model_display_name,
    training_fraction_split=0.7,
    validation_fraction_split=0.2,
    test_fraction_split=0.1,
    sync=True
)

In [None]:
model_display_name

In [63]:
model = aiplatform.Model.list(filter=f'display_name={model_display_name}')

In [None]:
model[0]

In [None]:
# Review model evaluation scores
# After the model training has finished, we can review the evaluation scores for it using the list_model_evaluations() method. 
# This method will return an iterator for each evaluation slice.

In [None]:
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
deployed_model_display_name = f"{MY_NAME}-deployed-tweets-text-classification-model-{timestamp}"
endpoint = model[0].deploy(
    deployed_model_display_name=deployed_model_display_name, sync=True
)

In [None]:
model_evaluations = model[0].list_model_evaluations()

for model_evaluation in model_evaluations:
    print(model_evaluation.to_dict())