In [1]:
pip install flask joblib


Note: you may need to restart the kernel to use updated packages.




In [2]:
from flask import Flask, request, jsonify
import joblib
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from darts import TimeSeries
from darts.models import KalmanFilter
from darts.models import KalmanForecaster
import os

from sklearn.metrics import mean_squared_error
from statistics import mean
import math

The StatsForecast module could not be imported. To enable support for the StatsForecastAutoARIMA, StatsForecastAutoETS and Croston models, please consider installing it.


In [3]:
#model_path = 'C:/Users/marke/DSCP/models/test_forecast_model.pkl'

#def load_model():
#    return joblib.load(model_path)

#def save_model(model):
#    joblib.dump(model, model_path)

def detect_outliers(series, threshold=2): # need to set threshold of 1 for changes to be made
    mean = series.mean()
    std = series.std()
    z_scores = (series - mean) / std
    return z_scores.abs() > threshold

def data_preprocessing():

    ### Load new data
    new_data = pd.read_csv('fromazure (Macpherson).csv')

    ### Fixing Data Types and Index
    new_data['TimeFrame'] = pd.to_datetime(new_data['TimeFrame'])
    start_timestamp = new_data['TimeFrame'].min() + pd.Timedelta(minutes=10)

    new_timestamps = pd.date_range(start=start_timestamp, periods = len(new_data), freq='10T')    # Generate the new timestamps at 10-minute intervals
    new_df = pd.DataFrame({'TimeFrame': new_timestamps})
    new_data['TimeFrame'] = new_df['TimeFrame']     # Replace the NaT values with the new timestamps

    ### Fixing 0 values

    time_series_col = ['AvgPhosphorous', 'AvgNitrogen', 'AvgPotassium', 'LightIntensity', 'Temperature', 'Humidity', 'Heat']

    for i in time_series_col:
        new_data[i] = new_data[i].replace(0, np.nan)
        new_data[i] = new_data[i].bfill()


    ### Fixing Outliers

    for column in new_data[time_series_col].columns:
        outliers = detect_outliers(new_data[column])
        new_data.loc[outliers, column] = np.nan
        new_data[column].fillna(new_data[column].mean(), inplace=True)


    ### Dropping and Splitting


    new_data.drop(['LightIntensity', 'Humidity', 'Heat', 'Buzzer', 'SoilMoisture', 'Compost'], axis=1, inplace=True)

    train = new_data[:int(0.8*(len(new_data)))]
    test= new_data[int(0.8*(len(new_data))):]

    train_timeframe = train['TimeFrame']    # Save the timeframe index for the forecast model
    test_timeframe = test['TimeFrame']

    train.drop('TimeFrame', axis=1, inplace=True)

    print(train.head())

    ### Scaling

    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_train = scaler.fit_transform(train)

    col_list = train.columns.to_list()
    scaled_train = pd.DataFrame(scaled_train, columns=col_list)

    scaled_train.index = train_timeframe

    return scaled_train, test, train_timeframe, test_timeframe, scaler

In [4]:
scaled_train, test, train_timeframe, test_timeframe, scaler = data_preprocessing()

   NumberOfWorms  AvgPhosphorous  AvgNitrogen  AvgPotassium  Temperature
0             30            55.0        142.0         127.0         32.7
1             30            55.0        142.0         115.0         32.7
2             30            55.0        140.0         127.5         32.7
3             30            55.0        140.0         126.0         32.7
4             30            55.0        140.0         127.0         32.7


  new_timestamps = pd.date_range(start=start_timestamp, periods = len(new_data), freq='10T')    # Generate the new timestamps at 10-minute intervals
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  new_data[column].fillna(new_data[column].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  new_data[column].fil

In [5]:
scaled_train.index = train_timeframe

In [6]:
def kalman_model_retraining(scaled_train, test):

    ### Create the TimeSeries object
    kalman_df = scaled_train.reset_index()
    kalman_df.rename(columns={'index': 'date'}, inplace=True)
    col_list = scaled_train.columns.tolist()
    series = TimeSeries.from_dataframe(kalman_df, 'TimeFrame', col_list)


    ### Load the existing model
    #model = load_model()

    ### Train the Kalman model
    model = KalmanForecaster(dim_x=series.width)
    model.fit(series)

    # Save the updated model
    #save_model(model)

    return model

In [7]:
model = kalman_model_retraining(scaled_train, test)

In [8]:
def model_prediction(model, test):
    forecast_horizon = len(test)  # Number of steps to predict
    forecast = model.predict(forecast_horizon)

    forecast_df = forecast.pd_dataframe()
    forecast_df = pd.DataFrame(scaler.inverse_transform(forecast_df))
    forecast_df.columns=['NumberOfWorms_predicted', 'AvgPhosphorous_predicted', 'AvgNitrogen_predicted', 'AvgPotassium_predicted', 'Temperature_Predicted']  
    
    return  forecast_df

In [9]:
forecast_df = model_prediction(model, test)

In [10]:
def minimum_score(forecast_df, test):

    phos_valid = None
    nitro_valid = None
    potas_valid = None

    rmse_phos=math.sqrt(mean_squared_error(forecast_df['AvgPhosphorous_predicted'],test['AvgPhosphorous']))
    print('Mean value of AvgPhosphorous is : {}. Root Mean Squared Error is :{}'.format(mean(test['AvgPhosphorous']),rmse_phos))

    rmse_nitro=math.sqrt(mean_squared_error(forecast_df['AvgNitrogen_predicted'],test['AvgNitrogen']))
    print('Mean value of AvgNitrogen is : {}. Root Mean Squared Error is :{}'.format(mean(test['AvgNitrogen']),rmse_nitro))

    rmse_potas=math.sqrt(mean_squared_error(forecast_df['AvgPotassium_predicted'],test['AvgPotassium']))
    print('Mean value of AvgPotassium is : {}. Root Mean Squared Error is :{}'.format(mean(test['AvgPotassium']),rmse_potas))

    min_score_dict = {'Phos_Score': 100, 'Nitro_score': 70, 'Potas_score': 200}  ### Set the minimum score for each nutrient 

    if rmse_phos < min_score_dict['Phos_Score']:
        phos_valid = True
    if rmse_nitro < min_score_dict['Nitro_score']:
        nitro_valid = True
    if rmse_potas < min_score_dict['Potas_score']:
        potas_valid = True

    if phos_valid == True and nitro_valid == True and potas_valid == True:
        return True
    else:
        return False

In [11]:
minimum_score(forecast_df, test)

Mean value of AvgPhosphorous is : 56.526315789473685. Root Mean Squared Error is :14.368177759825771
Mean value of AvgNitrogen is : 61.526315789473685. Root Mean Squared Error is :20.540311438294843
Mean value of AvgPotassium is : 84.75438596491227. Root Mean Squared Error is :83.2935503613779


True

In [12]:
from flask import Flask, request, render_template, jsonify

app = Flask(__name__)

#@app.route('/',)
#def loadpage():
#    return render_template('home.html', query='') # try with simple dataframe and refresh button

@app.route('/') # use POST request if person needs to upload data
def retrain():
    scaled_train, test, train_timeframe, test_timeframe, scaler = data_preprocessing()
    scaled_train.index = train_timeframe

    model = kalman_model_retraining(scaled_train, test)
    forecast_df = model_prediction(model, test)
    minimum_score(forecast_df, test)

    # Quality control
    if minimum_score(forecast_df, test) == True: # satisfy the minimum score
        forecast_html = forecast_df.to_html()
    
        # html template should have two outputs, one is the dataset, other is the message
        return forecast_html
    #render_template(output1 = o1, 'home.html', query='Model retrained successfully and passed the minimum score')
    #else:  # failed the minimum score
    #    return render_template('home.html', query='Model retrained successfully but failed the minimum score')


app.run()

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit


In [None]:
curl -X POST -H "Content-Type: application/json" -d '{"data_path": "data/new_data.csv"}' http://127.0.0.1:5000/retrain


SyntaxError: invalid syntax (546931686.py, line 1)