In [1]:
import pandas as pd
import requests
import seaborn as sns
import os
from pycaret.regression import *

In [2]:
channel_id = 12397
api_key = os.environ['API_KEY']
n_feds = 8000
url = f'https://api.thingspeak.com/channels/{channel_id}/feeds.json?api_key={api_key}&results={n_feds}'

In [3]:
payload = requests.get(url).json()['feeds']

In [4]:
len(payload)

8000

In [5]:
columns = ['created_at','entry_id','field1','field2','field3','field4','field5','field6','field7','field8']

df_feed = pd.DataFrame(data=payload,columns=columns)

In [6]:
df = df_feed.drop(columns=['created_at','entry_id'])
df

Unnamed: 0,field1,field2,field3,field4,field5,field6,field7,field8
0,55,0.8,86,55.3,0,29.45,4.065,2
1,36,2,86,55.3,0,29.45,4.068,2
2,60,1.5,86,55.3,0,29.44,4.071,1
3,44,1.7,86,55.3,0,29.44,4.071,1
4,47,1.7,86,55.3,0,29.44,4.072,1
...,...,...,...,...,...,...,...,...
7995,34,0,62,43.4,0,29.94,4.092,0
7996,0,0,64,43.4,0,29.94,4.085,0
7997,0,0,65,44,0,29.94,4.09,0
7998,23,0,64,44.5,0,29.94,4.08,0


In [7]:
df['field4'].unique()

array(['55.3', '55.1', '55', '55.5', '55.7', '55.9', '56', '54.8', '54.6',
       '54.4', '54.2', '54', '53.9', '53.7', '53.5', '53.3', '53.1', '53',
       '52.8', '52.6', '52.4', '52.2', '52.1', '51.9', '51.7', '51.5',
       '51.4', '51.2', '51', '50.8', '50.6', '50.5', '50.3', '50.1',
       '49.9', '49.7', '49.5', '49.4', '49.2', '49', '48.8', '48.6',
       '48.5', '48.3', '48.1', '47.9', '47.8', '47.6', '47.4', '47.2',
       '47', '46.9', '46.7', '46.5', '46.3', '46.1', '46', '45.8', '45.6',
       '45.4', '45.2', '45', '44.9', '44.7', '44.5', '44.3', '44.1', '44',
       '43.8', '43.6', '43.4', '43.2', '43.1', '42.9', '42.7', '42.5',
       '42.4', '42.2', '42', '41.8', '41.6', '41.5', '41.3', '41.1',
       '40.9', '40.7', '40.5', '40.4', '40.2', '40', '39.8', '39.6',
       '39.5', '39.3', '39.1', '38.9', '38.8', '38.6', '38.4', '38.2',
       '38', '37.9', '37.7', '37.5', '37.3', '37.1', '37', '36.8', '36.6',
       '36.2', '35.7', '35.3', '35.1', '35', '34.8', '34.6', '34.

In [8]:
new_column_name = ['Wind Direction','Wind Speed','Humidity','Temperature','Rain','Pressure','Power Level','Light Intensity']
df = df.rename(columns={'field1':new_column_name[0],'field2':new_column_name[1],'field3':new_column_name[2],'field4':new_column_name[3],'field5':new_column_name[4],'field6':new_column_name[5],'field7':new_column_name[6],'field8':new_column_name[7]})
df

Unnamed: 0,Wind Direction,Wind Speed,Humidity,Temperature,Rain,Pressure,Power Level,Light Intensity
0,55,0.8,86,55.3,0,29.45,4.065,2
1,36,2,86,55.3,0,29.45,4.068,2
2,60,1.5,86,55.3,0,29.44,4.071,1
3,44,1.7,86,55.3,0,29.44,4.071,1
4,47,1.7,86,55.3,0,29.44,4.072,1
...,...,...,...,...,...,...,...,...
7995,34,0,62,43.4,0,29.94,4.092,0
7996,0,0,64,43.4,0,29.94,4.085,0
7997,0,0,65,44,0,29.94,4.09,0
7998,23,0,64,44.5,0,29.94,4.08,0


In [9]:
df = df.drop(columns=['Rain'])
df

Unnamed: 0,Wind Direction,Wind Speed,Humidity,Temperature,Pressure,Power Level,Light Intensity
0,55,0.8,86,55.3,29.45,4.065,2
1,36,2,86,55.3,29.45,4.068,2
2,60,1.5,86,55.3,29.44,4.071,1
3,44,1.7,86,55.3,29.44,4.071,1
4,47,1.7,86,55.3,29.44,4.072,1
...,...,...,...,...,...,...,...
7995,34,0,62,43.4,29.94,4.092,0
7996,0,0,64,43.4,29.94,4.085,0
7997,0,0,65,44,29.94,4.09,0
7998,23,0,64,44.5,29.94,4.08,0


In [10]:
df.head()

Unnamed: 0,Wind Direction,Wind Speed,Humidity,Temperature,Pressure,Power Level,Light Intensity
0,55,0.8,86,55.3,29.45,4.065,2
1,36,2.0,86,55.3,29.45,4.068,2
2,60,1.5,86,55.3,29.44,4.071,1
3,44,1.7,86,55.3,29.44,4.071,1
4,47,1.7,86,55.3,29.44,4.072,1


In [11]:
df.dtypes

Wind Direction     object
Wind Speed         object
Humidity           object
Temperature        object
Pressure           object
Power Level        object
Light Intensity    object
dtype: object

In [12]:
df = df.astype(float)
df.dtypes

Wind Direction     float64
Wind Speed         float64
Humidity           float64
Temperature        float64
Pressure           float64
Power Level        float64
Light Intensity    float64
dtype: object

In [13]:
df.isna().sum()

Wind Direction     0
Wind Speed         0
Humidity           0
Temperature        0
Pressure           0
Power Level        0
Light Intensity    0
dtype: int64

In [14]:
s = setup(data = df,
          target = 'Temperature',
          remove_outliers = True,
          session_id = 123)

In [15]:
models()

Unnamed: 0_level_0,Name,Reference,Turbo
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
lr,Linear Regression,sklearn.linear_model._base.LinearRegression,True
lasso,Lasso Regression,sklearn.linear_model._coordinate_descent.Lasso,True
ridge,Ridge Regression,sklearn.linear_model._ridge.Ridge,True
en,Elastic Net,sklearn.linear_model._coordinate_descent.Elast...,True
lar,Least Angle Regression,sklearn.linear_model._least_angle.Lars,True
llar,Lasso Least Angle Regression,sklearn.linear_model._least_angle.LassoLars,True
omp,Orthogonal Matching Pursuit,sklearn.linear_model._omp.OrthogonalMatchingPu...,True
br,Bayesian Ridge,sklearn.linear_model._bayes.BayesianRidge,True
ard,Automatic Relevance Determination,sklearn.linear_model._bayes.ARDRegression,False
par,Passive Aggressive Regressor,sklearn.linear_model._passive_aggressive.Passi...,True


In [16]:
best_model = compare_models()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
et,Extra Trees Regressor,0.4292,0.5839,0.7619,0.989,0.0173,0.0099,0.642
rf,Random Forest Regressor,0.4447,0.6884,0.8263,0.987,0.0189,0.0102,0.723
lightgbm,Light Gradient Boosting Machine,0.6223,0.9193,0.9572,0.9827,0.0216,0.0143,0.559
dt,Decision Tree Regressor,0.5053,1.2308,1.1023,0.9769,0.0252,0.0117,0.525
gbr,Gradient Boosting Regressor,1.0944,2.3492,1.5314,0.9557,0.0346,0.0252,0.587
ada,AdaBoost Regressor,2.2428,6.7312,2.5928,0.8732,0.0586,0.0522,0.593
knn,K Neighbors Regressor,1.7644,9.0494,3.0006,0.8296,0.0658,0.0394,0.582
lr,Linear Regression,2.8432,12.4547,3.5282,0.7653,0.077,0.0643,1.081
br,Bayesian Ridge,2.844,12.4536,3.528,0.7653,0.077,0.0643,0.534
ridge,Ridge Regression,2.8571,12.483,3.5322,0.7648,0.0773,0.0647,0.53


In [17]:
et = create_model('et')

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.4145,0.5267,0.7257,0.9898,0.017,0.0097
1,0.4128,0.4922,0.7016,0.9904,0.0155,0.0094
2,0.4497,0.7008,0.8371,0.9866,0.0187,0.0102
3,0.4092,0.4961,0.7044,0.9911,0.0152,0.0092
4,0.4183,0.4948,0.7034,0.9909,0.0158,0.0095
5,0.449,0.6367,0.7979,0.9878,0.0181,0.0103
6,0.4129,0.4999,0.707,0.9908,0.0161,0.0095
7,0.4251,0.6862,0.8284,0.9877,0.0192,0.01
8,0.4744,0.7214,0.8494,0.9862,0.0198,0.0112
9,0.4256,0.5843,0.7644,0.9886,0.0172,0.0096


In [18]:
tuned_et = tune_model(et)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1.3718,3.3674,1.835,0.9347,0.0421,0.0316
1,1.3818,3.3963,1.8429,0.9341,0.0413,0.0318
2,1.41,3.4489,1.8571,0.9342,0.042,0.0322
3,1.4375,3.4975,1.8702,0.9372,0.0429,0.0332
4,1.3816,3.3508,1.8305,0.9382,0.0415,0.0316
5,1.4063,3.6482,1.91,0.9302,0.0429,0.0318
6,1.3544,3.1985,1.7884,0.9409,0.0403,0.0311
7,1.3243,3.0598,1.7492,0.9453,0.0404,0.0309
8,1.2861,2.7952,1.6719,0.9465,0.0389,0.0302
9,1.2562,3.0214,1.7382,0.9413,0.039,0.0285


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 10 candidates, totalling 100 fits


In [19]:
save_model(tuned_et, './pickle_et_pycaret')

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=FastMemory(location=C:\Users\mathe\AppData\Local\Temp\joblib),
          steps=[('numerical_imputer',
                  TransformerWrapper(include=['Wind Direction', 'Wind Speed',
                                              'Humidity', 'Pressure',
                                              'Power Level', 'Light Intensity'],
                                     transformer=SimpleImputer())),
                 ('categorical_imputer',
                  TransformerWrapper(include=[],
                                     transformer=SimpleImputer(strategy='most_frequent'))),
                 ('remove_outliers',
                  TransformerWrapper(transformer=RemoveOutliers(random_state=123))),
                 ('clean_column_names',
                  TransformerWrapper(transformer=CleanColumnNames())),
                 ('trained_model',
                  ExtraTreesRegressor(n_jobs=-1, random_state=123))]),
 './deploy/pickle_et_pycaret.pkl')