## 1. Dependencias y variables

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
shell_output = !gcloud auth list 2>/dev/null
SERVICE_ACCOUNT = shell_output[2].replace("*", "").strip()
print("Service Account:", SERVICE_ACCOUNT)

Service Account: 330930586045-compute@developer.gserviceaccount.com


In [None]:
USER_FLAG = "--user"
!pip3 install {USER_FLAG} google-cloud-aiplatform --upgrade
!pip3 install {USER_FLAG} kfp==2.4.0 google-cloud-pipeline-components
!pip3 install {USER_FLAG} gcsfs
!pip install -U google-cloud-aiplatform "shapely>2"

In [None]:
import os

if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)


In [None]:
!python3 -c "import kfp; print('KFP SDK version: {}'.format(kfp.__version__))"
!python3 -c "import google_cloud_pipeline_components; print('google_cloud_pipeline_components version: {}'.format(google_cloud_pipeline_components.__version__))"

In [None]:
import os
PROJECT_ID = ""
if not os.getenv("IS_TESTING"):
    shell_output=!gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID: ", PROJECT_ID)


## 2. Librerias

In [3]:
import os
import pprint as pp
import sys

import pickle
import os
import argparse

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error,r2_score,mean_squared_error,mean_absolute_percentage_error
import numpy as np
import pandas as pd

from google.cloud import storage
from google.cloud import aiplatform 

## 3. Modelo

In [4]:
BUCKET_NAME="gs://bucket2025nahumfg/inputs/trabajo_final_tercer_estadio"
url = BUCKET_NAME+"/CreditScoring.csv"
data = pd.read_csv(url)
data.head()

Unnamed: 0,ID,SeriousDlqin2yrs,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents
0,1,1,0.766127,45,2,0.802982,9120.0,13,0,6,0,2.0
1,2,0,0.957151,40,0,0.121876,2600.0,4,0,0,0,1.0
2,3,0,0.65818,38,1,0.085113,3042.0,2,1,0,0,0.0
3,4,0,0.23381,30,0,0.03605,3300.0,5,0,0,0,0.0
4,5,0,0.907239,49,1,0.024926,63588.0,7,0,1,0,0.0


In [5]:
modelling_columns = [
                      'SeriousDlqin2yrs'
                    , 'RevolvingUtilizationOfUnsecuredLines'
                    , 'age'
                    , 'NumberOfTime30-59DaysPastDueNotWorse'
                    , 'DebtRatio'
                    , 'MonthlyIncome'
                    , 'NumberOfOpenCreditLinesAndLoans'
                    , 'NumberOfTimes90DaysLate'
                    ,'NumberRealEstateLoansOrLines'
                    , 'NumberOfTime60-89DaysPastDueNotWorse'
                    ,'NumberOfDependents'
                    ]
data = data[modelling_columns]

In [6]:
train_size = 0.8
test_size = 0.1
valid_size = 0.1

train_ds, valid_ds, test_ds = np.split(data.sample(frac=1, random_state=42), [int((train_size)*len(data)), int((1-test_size)*len(data))])

In [7]:
target = "SeriousDlqin2yrs"

x_train = train_ds.drop(columns=target, axis=1)
y_train = train_ds[target]

x_valid = valid_ds.drop(columns=target, axis=1)
y_valid = valid_ds[target]

x_test = test_ds.drop(columns=target, axis=1)
y_test = test_ds[target]


In [8]:
model = RandomForestRegressor()
model.fit(x_train , y_train)

In [9]:
y_pred = model.predict(x_valid)

#evaluate Model
adj_r2 = r2_score(y_true=y_valid, y_pred=y_pred)
mae = mean_absolute_error(y_true=y_valid, y_pred=y_pred)
mse = mean_squared_error(y_true=y_valid, y_pred=y_pred)
mape = mean_absolute_percentage_error(y_true=y_valid, y_pred=y_pred)
rmse = np.sqrt(mse)
print(f"Adjusted R2 : {adj_r2}")
print(f"Mean Absolute Error : {mae}")
print(f"Mean Absolute Percentage Error : {round(mape,4)*100}%")
print(f"Mean Squared Error : {mse}")
print(f"Root Mean Squared Error : {rmse}")

Adjusted R2 : 0.1753158620137517
Mean Absolute Error : 0.10277432922223403
Mean Absolute Percentage Error : 2.412625609660365e+16%
Mean Squared Error : 0.05278725830204145
Root Mean Squared Error : 0.2297547786272169


## 4. Primera forma de desplegar

In [10]:
MODEL_PATH=BUCKET_NAME+"/models/"
model_path = "./" + "model.pkl"
with open(model_path, 'wb') as file:  
    pickle.dump(model, file) 
    
#copy model artifacts to GCS storage
!gsutil cp "model.pkl" $MODEL_PATH

Copying file://model.pkl [Content-Type=application/octet-stream]...
\ [1 files][ 81.7 MiB/ 81.7 MiB]                                                
Operation completed over 1 objects/81.7 MiB.                                     


In [11]:
#Prediction containers list available at : https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers
serving_container_uri = "us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-3:latest"

#define GCS location for model artifacts
artifact_uri = MODEL_PATH

#Upload Model to Vertex AI Model Registry using Python SDK
model = aiplatform.Model.upload(display_name= "MLOps0-model" ,
                                    artifact_uri=artifact_uri,
                                    serving_container_image_uri=serving_container_uri)


Creating Model
Create Model backing LRO: projects/330930586045/locations/us-central1/models/5677153467672559616/operations/1965766158686617600
Model created. Resource name: projects/330930586045/locations/us-central1/models/5677153467672559616@1
To use this Model in another session:
model = aiplatform.Model('projects/330930586045/locations/us-central1/models/5677153467672559616@1')


In [12]:
#Create the model endpoint using Python SDK
endpoint = model.deploy(machine_type="n1-standard-4",
                        min_replica_count=1,
                        max_replica_count=1)

Creating Endpoint
Create Endpoint backing LRO: projects/330930586045/locations/us-central1/endpoints/6830475194511917056/operations/2094224301182943232
Endpoint created. Resource name: projects/330930586045/locations/us-central1/endpoints/6830475194511917056
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/330930586045/locations/us-central1/endpoints/6830475194511917056')
Deploying model to Endpoint : projects/330930586045/locations/us-central1/endpoints/6830475194511917056
Deploy Endpoint model backing LRO: projects/330930586045/locations/us-central1/endpoints/6830475194511917056/operations/679988465072340992
Endpoint model deployed. Resource name: projects/330930586045/locations/us-central1/endpoints/6830475194511917056


In [14]:
#Test the model endpoint using Python SDK

#create list to hold request data
instances = [
    [0.766127, 45, 2, 0.802982, 9120.0, 13, 0, 6, 0, 2.0],
  ]

prediction = endpoint.predict(instances=instances)

print(prediction)


Prediction(predictions=[0.75], deployed_model_id='2572906687019089920', metadata=None, model_version_id='1', model_resource_name='projects/330930586045/locations/us-central1/models/5677153467672559616', explanations=None)


In [16]:
# Undeploy the model and delete the endpoint
endpoint.undeploy_all()
endpoint.delete()
model.delete()

Undeploying Endpoint model: projects/330930586045/locations/us-central1/endpoints/6830475194511917056
Undeploy Endpoint model backing LRO: projects/330930586045/locations/us-central1/endpoints/6830475194511917056/operations/2526569865410510848
Endpoint model undeployed. Resource name: projects/330930586045/locations/us-central1/endpoints/6830475194511917056
Deleting Endpoint : projects/330930586045/locations/us-central1/endpoints/6830475194511917056
Endpoint deleted. . Resource name: projects/330930586045/locations/us-central1/endpoints/6830475194511917056
Deleting Endpoint resource: projects/330930586045/locations/us-central1/endpoints/6830475194511917056
Delete Endpoint backing LRO: projects/330930586045/locations/us-central1/operations/7138255883837898752
Endpoint resource projects/330930586045/locations/us-central1/endpoints/6830475194511917056 deleted.
Deleting Model : projects/330930586045/locations/us-central1/models/5677153467672559616
Model deleted. . Resource name: projects/3