In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import datetime as dt
import config

In [2]:
gs_bucket_name ="lifesight-data-table/MLops"
Bucket_uri = "gs://lifesight-data-table/MLops"
data_path = Bucket_uri+"/"+"data/house_data.csv"
processed_train = Bucket_uri+"/"+"train/train_data.csv"
processed_test = Bucket_uri+"/"+"test/test_data.csv"
model_path = Bucket_uri+"/model/"

In [3]:
data = pd.read_csv(config.data_path)

In [4]:
data.drop(['id','date'],axis=1,inplace=True)
#converting built year to actual age of the house
data['built_age'] = 2021 - data.yr_built 
data.drop('yr_built',axis=1,inplace=True)

X = list(data.iloc[:,1:].values) #independent
y = data.price.values #dependent
#scaling X values
sn = StandardScaler()
X = sn.fit_transform(X)

y = np.log10(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)
X_train = pd.DataFrame(X_train)
X_train['y_train'] = y_train

X_test = pd.DataFrame(X_test)
X_test['y_test'] = y_test

In [5]:
X_train.to_csv(config.processed_train,index=False)
X_test.to_csv(config.processed_test,index=False)

In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

import pickle
from google.cloud import storage
import config

In [2]:
train_data = pd.read_csv(config.processed_train)
y_train = train_data['y_train']
X_train = train_data.drop(columns=['y_train'])

lr = LinearRegression(normalize=True,fit_intercept=True,n_jobs=1)
lr.fit(X_train,y_train)
filename = 'model.sav'
pickle.dump(lr,open(filename,'wb'))

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LinearRegression())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




In [3]:
bucket = storage.Client().bucket(config.gs_bucket_name)
blob = bucket.blob(str('MLops/model/')+filename)
blob.upload_from_filename(filename)

In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
#import os
import pickle
from tempfile import TemporaryFile
from google.cloud import storage
import config

In [2]:
test_data = pd.read_csv(config.processed_test)
y_test = test_data['y_test']
X_test = test_data.drop(columns=['y_test'])

In [4]:
model_bucket = 'MLops/model/model.sav'
#bucket = storage.Client().get_bucket(bucket_name)
bucket = storage.Client().get_bucket(config.gs_bucket_name)
blob = bucket.blob(model_bucket)

In [5]:
with TemporaryFile() as temp_file:
    #download blob into temp file
    blob.download_to_file(temp_file)
    temp_file.seek(0)
    #load into joblib
    #model=joblib.load(temp_file)
    loaded_model = pickle.load(temp_file)

#loaded_model = pickle.load(open(filename, 'rb'))
y_pred = loaded_model.predict(X_test)

In [7]:
test_data['y_pred'] = y_pred
#test_data.to_csv("gs://lifesight-data-table/MLops/predicted_data.csv",index=False)
test_data.to_csv(config.predicted_data,index=False)