In [73]:
## imporrt libarary 
import os
import pandas as pd
import numpy as np

#import google cloud library
from google.cloud import bigquery
from google.cloud import storage
# from google.cloud import aiplatform
# from support_functions import missing_value, fill_missing, list_dtypes

## sklearn module
from sklearn.linear_model import LogisticRegression 
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import (
    OneHotEncoder, 
    OrdinalEncoder, 
    RobustScaler, 
    MinMaxScaler, 
    StandardScaler
)
from sklearn.compose import ColumnTransformer, TransformedTargetRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
import pickle


In [26]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "trial_bigq.json"

In [27]:
project_id = 'dtidsUS'
dataset_id = 'capstone'
table_id = 'data_daegu_apartment'
region = 'us-central1'
bucket_name = 'modul4'
blob_name = 'farrel/data_daegu_apartment.csv'
client = bigquery.Client(project='dtidsus')

In [28]:
client = bigquery.Client(project='dtidsus')

In [29]:
try : 
    storage_client = storage.Client(project='dtidsus')
    bucket = storage_client.get_bucket(bucket_name)
    data_capstone = bucket.blob('data_daegu_apartment.csv')
    data_capstone.upload_from_filename(r'daegu_cleaned.csv')

    print ("Uploading model succeeded")
except:
    raise TypeError("An exception occurred")

Uploading model succeeded


In [30]:
client = bigquery.Client('dtidsus')

In [31]:
query_job = client.query(f"""select * from {dataset_id}.{table_id}""")

In [32]:
df = query_job.result().to_dataframe()



In [33]:
data = df

In [68]:
numerical_cols = [
    'N_FacilitiesNearBy', 'N_FacilitiesNearBy_4',
    'N_SchoolNearBy', 'YearBuilt', 'Size','N_Parkinglot'
]
categorical_cols = ['SubwayStation', 'HallwayType']
ordinal_cols = ['TimeToSubway']
X = df.drop('SalePrice', axis=1)
y = df['SalePrice']  

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

time_to_subway_categories = ['0-5min', '5min~10min', '10min~15min', '15min~20min', 'no_bus_stop_nearby']


In [69]:
def train_model(df):
    # Define preprocessing for numeric and categorical features
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numerical_cols),
            ('cat', OneHotEncoder(), categorical_cols),
            ('ord', Pipeline(steps=[
                ('ordencode', OrdinalEncoder(categories=[time_to_subway_categories])),
                ('scale', StandardScaler())
            ]), ordinal_cols),
        ]
    )

    # Define the model as RandomForestRegressor
    model = RandomForestRegressor(
        n_estimators=200,       # Set number of trees in the forest
        random_state=42,        # For reproducibility
        n_jobs=-1               # Use all available CPUs for faster training
    )

    # Create pipeline
    pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])

    # Train the pipeline
    pipeline.fit(X_train, y_train)

    # Make predictions
    y_pred = pipeline.predict(X_test)

    # Calculate performance metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    return pipeline, rmse, mae, r2

##upload the model

In [77]:
model, rmse, mae, r2 = train_model(data)
print('RMSE: ', rmse)
print('MAE: ', mae)
print('R2: ', r2)

RMSE:  42559.556439403794
MAE:  33559.9453437682
R2:  0.8369439222430464


In [78]:
# Save model
model_filename = 'model.pkl'
pickle.dump(model, open(model_filename, 'wb'))

In [79]:
try : 
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name) # Add bucket name
    blob_model = bucket.blob('ilham/model/model.pkl')
    blob_model.upload_from_filename('model.pkl')

    print ("Uploading model succeeded")
except:
    raise TypeError("An exception occurred")

Uploading model succeeded
