In [2]:
import pandas as pd
import numpy as np
import pickle
from xgboost import XGBClassifier

from google.cloud import bigquery
from google.cloud import storage
from google.cloud import aiplatform

In [3]:
df_test_model = pd.DataFrame({
    'age': [30, 25],  # example numerical feature
    'job': ['management', 'student'],
    'balance': [20000, 5000],
    'housing': ['yes', 'no'],
    'loan': ['no', 'no'],
    'contact': ['cellular', 'unknown'],
    'month': ['apr', 'may'],
    'campaign': [1, 3],
    'pdays': [999, -1],
    'poutcome': ['success', 'unknown']

})
display(df_test_model)

Unnamed: 0,age,job,balance,housing,loan,contact,month,campaign,pdays,poutcome
0,30,management,20000,yes,no,cellular,apr,1,999,success
1,25,student,5000,no,no,unknown,may,3,-1,unknown


In [4]:
## set up authentication using services account 
import os
# Authenticate using service account
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "sa-development.json"
# os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "../vertex-ai-ml/dti-ds-31329ac0651d.json"

project_id = 'dti-ds'
dataset_id = 'helvi_dataset_007'
table_id = 'campaign_test'
region = 'us-central1'
bucket_name = 'helvi_gcs_007'
blob_name = 'data/campaign_bank.csv'

model_name = 'Capstone_3_helvila.pkl'

In [5]:
# Retrieve the model from Google Cloud Storage
try : 
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name) # Add bucket name
    blob_model = bucket.blob(f'model/{model_name}')
    blob_model.download_to_filename('Capstone_3_helvila.pkl')

    print ("Read model succeeded")
except:
    raise TypeError("An exception occurred")

Read model succeeded


In [7]:
#load data from BQ
from google.cloud import bigquery
## using bigquery client 
# client = bigquery.Client(credentials=credentials,project=project_id)
client = bigquery.Client(project=project_id)

# query 
query_job = client.query(f"""select * from {dataset_id}.{table_id}""")
df = query_job.result().to_dataframe()

# df = df.loc[:, new_test_data.columns]
# auto_cloud.columns = df_test_model.columns
#cleansing 
result = df.drop(['int64_field_0'], axis = 1)
# auto_cloud



In [8]:
# Load the model
with open('Capstone_3_helvila.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

y_pred_cloud_new_data = loaded_model.predict(df_test_model)
y_pred_cloud_new_data

array([1, 0])

In [9]:
new_test_cloud = df_test_model.copy()
new_test_cloud['bank_campaign'] = y_pred_cloud_new_data
new_test_cloud

Unnamed: 0,age,job,balance,housing,loan,contact,month,campaign,pdays,poutcome,bank_campaign
0,30,management,20000,yes,no,cellular,apr,1,999,success,1
1,25,student,5000,no,no,unknown,may,3,-1,unknown,0


In [10]:
result

Unnamed: 0,age,job,balance,housing,loan,contact,month,campaign,pdays,poutcome
0,49,admin.,0,False,False,cellular,apr,1,-1,unknown
1,54,admin.,1693,False,False,cellular,apr,1,87,other
2,43,admin.,664,True,False,cellular,apr,1,-1,unknown
3,40,admin.,1060,True,False,cellular,apr,3,-1,unknown
4,48,admin.,4099,False,False,cellular,apr,2,-1,unknown
...,...,...,...,...,...,...,...,...,...,...
1558,28,self-employed,159,False,False,cellular,oct,2,420,other
1559,30,self-employed,1785,False,False,unknown,oct,1,-1,unknown
1560,28,self-employed,123,False,True,telephone,oct,1,-1,unknown
1561,51,self-employed,0,False,False,cellular,oct,2,-1,unknown


In [11]:
result['loan'] = result['loan'].astype(str).replace({'True':'yes','False':'no'})
result['housing'] = result['housing'].astype(str).replace({'True':'yes','False':'no'})

In [12]:
result

Unnamed: 0,age,job,balance,housing,loan,contact,month,campaign,pdays,poutcome
0,49,admin.,0,no,no,cellular,apr,1,-1,unknown
1,54,admin.,1693,no,no,cellular,apr,1,87,other
2,43,admin.,664,yes,no,cellular,apr,1,-1,unknown
3,40,admin.,1060,yes,no,cellular,apr,3,-1,unknown
4,48,admin.,4099,no,no,cellular,apr,2,-1,unknown
...,...,...,...,...,...,...,...,...,...,...
1558,28,self-employed,159,no,no,cellular,oct,2,420,other
1559,30,self-employed,1785,no,no,unknown,oct,1,-1,unknown
1560,28,self-employed,123,no,yes,telephone,oct,1,-1,unknown
1561,51,self-employed,0,no,no,cellular,oct,2,-1,unknown


In [13]:
# loaded_model.predict(X_test.iloc[3:13])
y_pred_file_cloud = loaded_model.predict(result)
y_pred_file_cloud[:13]

array([1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0])

In [14]:
result['campaign_pred'] = y_pred_file_cloud
result

Unnamed: 0,age,job,balance,housing,loan,contact,month,campaign,pdays,poutcome,campaign_pred
0,49,admin.,0,no,no,cellular,apr,1,-1,unknown,1
1,54,admin.,1693,no,no,cellular,apr,1,87,other,1
2,43,admin.,664,yes,no,cellular,apr,1,-1,unknown,0
3,40,admin.,1060,yes,no,cellular,apr,3,-1,unknown,0
4,48,admin.,4099,no,no,cellular,apr,2,-1,unknown,1
...,...,...,...,...,...,...,...,...,...,...,...
1558,28,self-employed,159,no,no,cellular,oct,2,420,other,1
1559,30,self-employed,1785,no,no,unknown,oct,1,-1,unknown,0
1560,28,self-employed,123,no,yes,telephone,oct,1,-1,unknown,0
1561,51,self-employed,0,no,no,cellular,oct,2,-1,unknown,1


In [17]:
# Define your parameters
project_id = 'dti-ds'
bucket_name = 'helvi_gcs_007'
model_folder = 'data'
result_file_name = 'campaign_predictions.csv'
blob_name = f'{model_folder}/{result_file_name}'

# Save the DataFrame as a CSV file
local_csv_path = result_file_name
result.to_csv(local_csv_path, index=False)

In [18]:
# Initialize the Google Cloud Storage client and upload the CSV file
try:
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(blob_name)
    blob.upload_from_filename(local_csv_path)
    print("File uploaded to GCS successfully.")
except Exception as e:
    print(f"An error occurred while uploading to GCS: {e}")

File uploaded to GCS successfully.
