## Notes : please use python version > 3.10 or 3.11 to run all the codes below 

# Installations

# Import Library

In [12]:
## imporrt libarary 
import os
import pandas as pd
import numpy as np

#import google cloud library
from google.cloud import bigquery
from google.cloud import storage
from google.cloud import aiplatform

## sklearn module
from sklearn.linear_model import LogisticRegression 
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
import pickle


### Authenticate to GCP using Services Account 

- Depending on your Jupyter environment, you may have to manually 

- Call "sa-development.json" as teh auth to GCP 

- Adjust your path where the all code stored in you local 

In [13]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "./sa-development.json"

## Replace the code below with yours configuration 
- Configuratin naming convention has shared on the following link [configuration name](https://docs.google.com/spreadsheets/d/1U7bbXp9Y6uLGZXThfsqDneo1U-E3J8nwCcxIdpjgZmI/edit#gid=1834500505)

- Example configuration

    ```
    project_id = 'dti-ds'
    dataset_id = 'jaya_dataset_000'
    table_id = 'german_dataset'
    region = 'us-central1'
    bucket_name = 'jaya_gcs_000'
    blob_name = 'data/german_dataset.csv'
    
    ```


In [14]:
project_id = 'dti-ds'
dataset_id = 'syahlan_dataset_033'
table_id = 'bike_sharing_test'
region = 'us-central1'
bucket_name = 'syahlan_gcs_033'

In [15]:
import os

model_name = "best_model.sav"
local_model_dir = "models"
local_model_path = os.path.join(local_model_dir, model_name)

# Ensure the local directory exists
os.makedirs(local_model_dir, exist_ok=True)

try:
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name)
    blob_model = bucket.blob(f"model/{model_name}")
    blob_model.download_to_filename(local_model_path)

    print("Retrieve model succeeded")
except Exception as e:
    raise TypeError(f"An error occurred: {e}")

Retrieve model succeeded


## Model Predictions

In [16]:
import pickle
import sklearn
import xgboost

from sklearn.compose import ColumnTransformer
from category_encoders import BinaryEncoder


model_name = "best_model.sav"
model_path = f"models/{model_name}"

loaded_model = pickle.load(open(model_path, 'rb'))

# Sample data for prediction
test_data = [
    [0.61, "Clear", 0, "winter", 0.3333, 0, 3, 2012, "Dec"],
    [0.37, "Clear", 0, "fall", 0.6364, 10, 1, 2011, "Aug"]
]

columns = ['humidity', 'weathersit', 'holiday', 'season',
           'atemp', 'hour', 'day', 'year',
           'month']

df = pd.DataFrame(test_data, columns=columns)

predictions = loaded_model.predict(df)

predictions

array([ 43.14527, 150.16676], dtype=float32)

In [17]:
# load data from BQ
client = bigquery.Client(project=project_id)

# query 
query_job = client.query(f"""select * from {dataset_id}.{table_id}""")
test_df = query_job.result().to_dataframe()

test_df



Unnamed: 0,humidity,weathersit,holiday,season,atemp,hour,day,year,month
0,0.37,Clear,0,fall,0.8182,15,6,2012,Aug
1,0.62,Misty,0,winter,0.3788,7,1,2011,Dec
2,0.66,Clear,0,winter,0.3788,20,1,2011,Dec
3,0.93,Misty,0,winter,0.3788,3,0,2012,Dec
4,0.71,Misty,0,spring,0.3788,3,2,2011,Dec
...,...,...,...,...,...,...,...,...,...
2428,0.75,Clear,1,winter,0.2727,7,3,2011,Nov
2429,0.87,Clear,0,winter,0.2727,7,5,2011,Nov
2430,0.80,Clear,0,winter,0.2727,7,4,2012,Nov
2431,0.81,Clear,0,winter,0.2727,5,2,2011,Nov


In [18]:
bulk_predict_df = test_df

In [19]:
y_pred = loaded_model.predict(bulk_predict_df)
y_pred_df = pd.DataFrame(y_pred, columns=['count'])

result_df = pd.concat([test_df.reset_index(drop=True), y_pred_df.reset_index(drop=True)], axis=1)
result_df

Unnamed: 0,humidity,weathersit,holiday,season,atemp,hour,day,year,month,count
0,0.37,Clear,0,fall,0.8182,15,6,2012,Aug,492.733429
1,0.62,Misty,0,winter,0.3788,7,1,2011,Dec,215.431808
2,0.66,Clear,0,winter,0.3788,20,1,2011,Dec,173.580429
3,0.93,Misty,0,winter,0.3788,3,0,2012,Dec,4.886236
4,0.71,Misty,0,spring,0.3788,3,2,2011,Dec,3.701888
...,...,...,...,...,...,...,...,...,...,...
2428,0.75,Clear,1,winter,0.2727,7,3,2011,Nov,57.782749
2429,0.87,Clear,0,winter,0.2727,7,5,2011,Nov,19.837704
2430,0.80,Clear,0,winter,0.2727,7,4,2012,Nov,227.520386
2431,0.81,Clear,0,winter,0.2727,5,2,2011,Nov,12.800999


In [20]:
from google.cloud.exceptions import NotFound

dest_table_id = 'bike_sharing_prediction'
table_full_id = f'{project_id}.{dataset_id}.{dest_table_id}'

schema = [
    bigquery.SchemaField('humidity', 'FLOAT'),
    bigquery.SchemaField('weathersit', 'STRING'),
    bigquery.SchemaField('holiday', 'INTEGER'),
    bigquery.SchemaField('season', 'STRING'),
    bigquery.SchemaField('atemp', 'FLOAT'),
    bigquery.SchemaField('hour', 'INTEGER'),
    bigquery.SchemaField('day', 'INTEGER'),
    bigquery.SchemaField('year', 'INTEGER'),
    bigquery.SchemaField('month', 'STRING'),
    bigquery.SchemaField('count', 'FLOAT')
]

table_ref = client.dataset(dataset_id).table(dest_table_id)

# Check if the table exists
try:
    client.get_table(table_ref)
    print(f'Table {table_full_id} already exists.')
except NotFound:
    # Create the table if it does not exist
    table = bigquery.Table(table_ref, schema=schema)
    table = client.create_table(table)
    print(f'Table {table_full_id} created.')

job_config = bigquery.LoadJobConfig(
    schema=schema,
    write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE
)

load_job = client.load_table_from_dataframe(result_df, table_ref, job_config=job_config)
load_job.result()

print(f'Loaded {load_job.output_rows} rows into {table_full_id}')

Table dti-ds.syahlan_dataset_033.bike_sharing_prediction already exists.
Loaded 2433 rows into dti-ds.syahlan_dataset_033.bike_sharing_prediction
