In [None]:
!pip install -r requirements.txt

In [None]:
#Important Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline 

#For saving the model
import pickle

#Data Preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
from scipy.stats import zscore

#Model selection and evalution
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.model_selection import cross_val_score, KFold, GridSearchCV

#Models
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor

In [None]:
class Model(object):
    def __init__(self, model_file, scaler_file):
        # Load pre-trained model from file
        self.model = pickle.load(open(model_file, 'rb'))
        # Load a "pre-trained" scaler from file
        self.scaler = pickle.load(open(scaler_file, 'rb'))
        
    def _preprocess(self, features):
        # Method to be run before inference. Contains things like
        # stripping unwanted columns, replacing NaNs, and scaling 
        # or normalizing data
        features.replace(['-999', -999], np.nan, inplace=True)
        features.fillna(0, inplace=True)
        features.drop(['Well Identifier'], axis=1, inplace=True)
        return self.scaler.fit_transform(features)
        
    def predict(self, features):
        # This function should be able to take in features in their
        # raw, unprocessed form as read from the file test.csv and
        # return predictions as an array integers of the same length
        X = self._preprocess(features)
        y_pred = self.model.predict(X)
        for i in range(len(y_pred)):
            if ( y_pred[i] < 0):
                y_pred[i] = 0.0 + np.random.randint(0, 20000)/1000

        return y_pred

In [None]:
model = Model('model.pkl', 'scaler.pkl') 

testset = pd.read_csv('tech_challenge2021_test.csv')

testset_pred = model.predict(testset)

np.savetxt('result.csv', testset_pred, header='Rate of Penetration', comments='', fmt='%f')