In [None]:
!pip install -r requirements.txt

In [None]:
# import libraries
import pandas as pd
import numpy as np
import pickle

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')

import warnings
warnings.filterwarnings('ignore')

# import sklearn libararies 
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import RandomForestRegressor

In [None]:
class Model(object):
    def __init__(self, model_file):
        # Load pre-trained model from file
        self.model = pickle.load(open(model_file, 'rb'))
        
    def _preprocess(self, features):
        # Add all your preprocessing steps
        # Method to be run before inference. Contains things like
        # stripping unwanted columns, replacing NaNs, and scaling 
        # or normalizing data
        features.replace(['-999', -999], np.nan, inplace=True)
        features.fillna(0, inplace=True)
        features.drop(['Well Identifier', 'Measured Depth m', 'Mud Flow In L/min', 'Hole Depth (TVD) m'], 
                      axis='columns', inplace=True)
        return features
        
    def predict(self, features):
        # This function should be able to take in features in their
        # raw, unprocessed form as read from the file test.csv and
        # return predictions as an array integers of the same length
        X = self._preprocess(features)
        y_pred = self.model.predict(X)
        for i in range(len(y_pred)):
            if ( y_pred[i] < 0):
                y_pred[i] = 0.0 + np.random.randint(0, 20000)/1000

        return y_pred

In [None]:
# Start the model with RandomForest Regressor
mm = Model('model.pkl')

# Provide input file containing features here
test_data = pd.read_csv('tech_challenge2021_test.csv')

# Predict the output
y_pred = mm.predict(test_data)

# Save the predicted value to "result.csv" file
np.savetxt('result.csv', y_pred, header='Rate of Penetration', comments='', fmt='%f')