<a href="https://colab.research.google.com/github/BengIslam7/TinyML-IoT/blob/main/Temperature_Humidity_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
# Mount Google Drive to access files.
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
# Unzip the dataset from Google Drive.
!unzip -q '/content/gdrive/MyDrive/iot_dataset.zip'

In [3]:
# Import necessary libraries for data manipulation, deep learning, and image processing.
import numpy as np
import pandas as pd
import tensorflow as tf
import cv2

In [4]:
# Load the dataset from the CSV file into a pandas DataFrame.
data = pd.read_csv('/content/DATA-large.CSV')

In [5]:
# Display the first few rows of the DataFrame to inspect the data structure.
data.head()

Unnamed: 0,time,temperature,humidity,pressure,lux
0,2022/3/12 22:32:2,24.2,32.7,98490.45,10.0
1,2022/3/12 22:32:6,24.0,33.9,98486.98,12.08
2,2022/3/12 22:32:11,24.1,33.9,98488.27,22.92
3,2022/3/12 22:32:16,24.1,33.8,98489.58,22.92
4,2022/3/12 22:32:21,24.1,35.0,98489.49,22.92


In [6]:
# Check for null values in the 'temperature', 'humidity', 'lux', and 'pressure' columns.
data[['temperature','humidity','lux','pressure']].isnull().sum()

Unnamed: 0,0
temperature,0
humidity,0
lux,0
pressure,0


# Predict next temperature values with Linear Regression

In [7]:
# Extract the 'temperature' column for prediction and keep a copy of the original values.
temp = data['temperature'].values
real_temp = data['temperature'].values

In [8]:
# Display the 'temp' array.
temp

array([24.2, 24. , 24.1, ..., 23.3, 23.3, 23.3])

In [9]:
# Display the shape of the 'temp' array.
temp.shape

(693220,)

In [10]:
# Import MinMaxScaler for feature scaling.
from sklearn.preprocessing import MinMaxScaler
# Initialize the MinMaxScaler.
scaler = MinMaxScaler()
# Scale the temperature data to a range between 0 and 1.
temp = scaler.fit_transform(temp.reshape(-1,1))

In [None]:
# Display the scaled 'temp' array.
temp

array([[0.47272727],
       [0.45454545],
       [0.46363636],
       ...,
       [0.39090909],
       [0.39090909],
       [0.39090909]])

In [None]:
# Display the shape of the scaled 'temp' array.
temp.shape

(693220, 1)

In [None]:
# Prepare the data for time series prediction.
# Create sequences of 10 previous temperature values (X) to predict the 11th value (y).
X, y = [],[]
for i in range(0,temp.shape[0] - 500,5):
  X.append(temp[i:i+10])
  y.append(temp[i+10])
# Convert lists to numpy arrays.
X=np.array(X)
y=np.array(y)
# Reshape X for the linear model (flatten each window).
X = X.reshape(X.shape[0], -1)
# Print the shapes of the prepared X and y arrays.
print(X.shape, y.shape)

(138544, 10) (138544, 1)


In [None]:
# Display the prepared X array.
X

array([[0.47272727, 0.45454545, 0.46363636, ..., 0.46363636, 0.46363636,
        0.46363636],
       [0.46363636, 0.46363636, 0.46363636, ..., 0.46363636, 0.33636364,
        0.32727273],
       [0.46363636, 0.46363636, 0.46363636, ..., 0.32727273, 0.32727273,
        0.32727273],
       ...,
       [0.37272727, 0.37272727, 0.37272727, ..., 0.37272727, 0.37272727,
        0.37272727],
       [0.37272727, 0.37272727, 0.37272727, ..., 0.37272727, 0.37272727,
        0.37272727],
       [0.38181818, 0.37272727, 0.37272727, ..., 0.37272727, 0.37272727,
        0.37272727]])

In [None]:
# Display the prepared y array.
y

array([[0.46363636],
       [0.32727273],
       [0.32727273],
       ...,
       [0.38181818],
       [0.37272727],
       [0.37272727]])

In [None]:
# Import necessary layers for building a Sequential Keras model.
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Build a simple sequential model with a single dense layer for linear regression.
model = Sequential([
    Dense(1, input_shape=(10,), activation='linear')
])

# Compile the model with Adam optimizer and Mean Squared Error loss.
model.compile(
    optimizer='adam',
    loss='mse'
)

# Train the model for 10 epochs.
model.fit(X, y, epochs=10)

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m4330/4330[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - loss: 8.4495e-05
Epoch 2/10
[1m4330/4330[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step - loss: 1.9575e-05
Epoch 3/10
[1m4330/4330[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - loss: 1.8402e-05
Epoch 4/10
[1m4330/4330[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - loss: 1.6886e-05
Epoch 5/10
[1m4330/4330[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - loss: 1.5794e-05
Epoch 6/10
[1m4330/4330[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - loss: 1.4361e-05
Epoch 7/10
[1m4330/4330[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - loss: 1.3387e-05
Epoch 8/10
[1m4330/4330[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - loss: 1.2924e-05
Epoch 9/10
[1m4330/4330[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - loss: 1.2152e-05
Epoch 10/10
[1m4330/4330[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x7e69e107ad80>

In [None]:
# Make predictions on a subset of the data and compare with real values.
y_pred , y_real = [] , []
# Iterate through a specific range of the dataset.
for i in range(692720,693220-10,4):
  # Create a prediction window.
  window = temp[i:i+10].reshape(1, -1)
  # Get scaled prediction from the model.
  prediction_scaled = model.predict(window)
  # Inverse transform the prediction to original scale.
  prediction = scaler.inverse_transform(prediction_scaled)
  # Append predicted and real values to lists.
  y_pred.append(prediction[0][0])
  y_real.append(real_temp[i+10])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 142ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 149ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 143ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 139ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

In [None]:
# Convert prediction and real value lists to numpy arrays.
y_pred = np.array(y_pred)
y_real = np.array(y_real)

In [None]:
# Import metrics for evaluating regression models.
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
# Calculate Mean Absolute Error.
mae = mean_absolute_error(y_real, y_pred)
# Calculate Mean Squared Error.
mse = mean_squared_error(y_real, y_pred)
# Calculate Root Mean Squared Error.
rmse = np.sqrt(mse)
# Calculate R-squared.
r2 = r2_score(y_real, y_pred)

# Print the calculated model metrics.
print("\nModel Metrics:")
print("MAE :", mae)
print("MSE :", mse)
print("RMSE:", rmse)
print("R²  :", r2)


Model Metrics:
MAE : 0.027527109781901104
MSE : 0.0017419748342613957
RMSE: 0.041736972030340144
R²  : 0.5716135034534993


In [None]:
# Import tensorflow to save the model.
import tensorflow as tf
# Define the path to save the model.
save_path = './my_saved_model.keras'
# Save the trained Keras model.
model.save(save_path)

In [None]:
# Import tensorflow for model loading and conversion.
import tensorflow as tf

# Load the Keras model from the .keras file.
loaded_keras_model = tf.keras.models.load_model('./my_saved_model.keras')

# Convert the Keras model to TFLite format.
converter = tf.lite.TFLiteConverter.from_keras_model(loaded_keras_model)
tflite_model = converter.convert()

# Save the TFLite model to a file.
with open('model.tflite', 'wb') as f:
  f.write(tflite_model)

Saved artifact at '/tmp/tmpv09gobeg'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 10), dtype=tf.float32, name='input_layer_4')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  134733497801360: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134733497805200: TensorSpec(shape=(), dtype=tf.resource, name=None)


In [None]:
# Convert the TFLite model into a C array header file using xxd, suitable for embedded systems.
!xxd -i model.tflite > model.h

# Predict next sensors values with Random Forest

In [None]:
# Extract 'temperature' and 'humidity' columns for prediction with Random Forest.
sdata = data[['temperature','humidity']].values

In [None]:
# Import MinMaxScaler for feature scaling.
from sklearn.preprocessing import MinMaxScaler
# Initialize the MinMaxScaler.
scaler = MinMaxScaler()
# Scale the temperature and humidity data.
sdata = scaler.fit_transform(sdata.reshape(-1,2))

In [None]:
# Prepare the data for time series prediction with Random Forest.
# Create sequences of 10 previous sensor values (X) to predict the 11th values (y).
X, y = [],[]
for i in range(0,sdata.shape[0] - 1000,5):
  X.append(sdata[i:i+10])
  y.append(sdata[i+10])
# Convert lists to numpy arrays.
X=np.array(X)
y=np.array(y)
# Reshape X for the Random Forest model.
X = X.reshape(X.shape[0], -1)
# Print the shapes of the prepared X and y arrays.
print(X.shape, y.shape)

(138444, 20) (138444, 2)


In [None]:
# Import RandomForestRegressor from scikit-learn.
from sklearn.ensemble import RandomForestRegressor
# Initialize and train the RandomForestRegressor model.
model = RandomForestRegressor()
model.fit(X, y)

In [None]:
# Create a copy of the original 'temperature' and 'humidity' values.
rdata = data[['temperature','humidity']].values.reshape(-1,2)

In [None]:
# Make predictions using the trained Random Forest model and evaluate its performance.
y_pred , y_real = [] , []
# Iterate through a specific range of the dataset for prediction.
for i in range(692220,693220-10,4):
  # Create a prediction window from scaled data.
  window = sdata[i:i+10].reshape(1, -1)
  # Get scaled prediction from the model.
  prediction_scaled = model.predict(window)
  # Inverse transform the prediction to original scale.
  prediction = scaler.inverse_transform(prediction_scaled)
  # Append predicted and real values to lists.
  y_pred.append(prediction[0])
  y_real.append(rdata[i+10])
# Convert prediction and real value lists to numpy arrays.
y_pred = np.array(y_pred)
y_real = np.array(y_real)

# Import metrics for evaluating regression models.
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
# Calculate Mean Absolute Error.
mae = mean_absolute_error(y_real, y_pred)
# Calculate Mean Squared Error.
mse = mean_squared_error(y_real, y_pred)
# Calculate Root Mean Squared Error.
rmse = np.sqrt(mse)
# Calculate R-squared.
r2 = r2_score(y_real, y_pred)

# Print the calculated model metrics.
print("\nModel Metrics:")
print("MAE :", mae)
print("MSE :", mse)
print("RMSE:", rmse)
print("R²  :", r2)


Model Metrics:
MAE : 0.047069760437066344
MSE : 0.005346319462546625
RMSE: 0.07311853022693102
R²  : 0.8403243934164186


In [None]:
# Import joblib for saving the model.
import joblib
# Save the trained Random Forest model to a file.
joblib.dump(model, "random_forest_regressor.pkl")

['random_forest_regressor.pkl']

# Predict next sensors values with LSTM

In [None]:
# Extract 'temperature' and 'humidity' columns for prediction with LSTM.
sdata = data[['temperature','humidity']].values
# Create a copy of the original 'temperature' and 'humidity' values.
rdata = data[['temperature','humidity']].values.reshape(-1,2)

In [None]:
# Import MinMaxScaler for feature scaling.
from sklearn.preprocessing import MinMaxScaler
# Initialize the MinMaxScaler.
scaler = MinMaxScaler()
# Scale the temperature and humidity data.
sdata = scaler.fit_transform(sdata.reshape(-1,2))

In [None]:
# Prepare the data for time series prediction with LSTM.
# Create sequences of 10 previous sensor values (X) to predict the 11th values (y).
X, y = [],[]
for i in range(0,sdata.shape[0] - 10,5):
  X.append(sdata[i:i+10])
  y.append(sdata[i+10])
# Convert lists to numpy arrays.
X=np.array(X)
y=np.array(y)
# Reshape X for the LSTM model, keeping the 3D structure (samples, timesteps, features).
X = X.reshape(X.shape[0], -1)
# Print the shapes of the prepared X and y arrays.
print(X.shape, y.shape)

(138642, 20) (138642, 2)


In [None]:
# Import necessary layers for building a Sequential Keras LSTM model.
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [None]:
# Build a Sequential LSTM model.
model = Sequential()
# Add the first LSTM layer with 128 units, returning sequences.
# Input shape is (timesteps, features), where timesteps is X.shape[1] and features is 1 after flattening.
model.add(LSTM(units=128, return_sequences=True,
          input_shape=(X.shape[1], 1)))
# Add a Dropout layer to prevent overfitting.
model.add(Dropout(0.2))
# Add a second LSTM layer with 128 units.
model.add(LSTM(units=128))
# Add another Dropout layer.
model.add(Dropout(0.2))
# Add a Dense output layer with 2 units (for temperature and humidity).
model.add(Dense(2))

# Compile the model with Adam optimizer and Mean Squared Error loss.
model.compile(optimizer='adam', loss='mean_squared_error')

  super().__init__(**kwargs)


In [None]:
# Train the LSTM model for 10 epochs.
# Note: The input X needs to be reshaped to (samples, timesteps, features) for LSTM.
# Since X was previously reshaped to (samples, features*timesteps), we need to correct this for LSTM.
# X should be reshaped to (X.shape[0], 10, 2) if 10 is the lookback and 2 is the number of features.
# However, given the current X.shape (138642, 20), it implies X has already been flattened in GeWVrNtPLcy9
# The current model expects (timesteps, 1) as input_shape, where timesteps = 20.
# This means the LSTM layers are being fed a flattened sequence of 20 elements as a single timestep.
# For a proper LSTM, X should be (samples, timesteps, features), e.g., (138642, 10, 2).
# If X was (138642, 20), then input_shape=(20, 1) means it's treating each of the 20 features as a timestep for a single feature input.
# For the current setup (X.shape=(138642, 20), input_shape=(20,1)), the input to LSTM will be treated as 20 timesteps with 1 feature each.
# It assumes the 20 elements are a sequence over time. Given X is a flattened window, this might not be ideal.
# For now, running with the current setup as defined.
model.fit(X,y,epochs=10)

Epoch 1/10
[1m4333/4333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 9ms/step - loss: 0.0037
Epoch 2/10
[1m4333/4333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 8ms/step - loss: 1.3234e-04
Epoch 3/10
[1m4333/4333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 9ms/step - loss: 1.0972e-04
Epoch 4/10
[1m4333/4333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 7ms/step - loss: 1.0235e-04
Epoch 5/10
[1m4333/4333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 7ms/step - loss: 9.9998e-05
Epoch 6/10
[1m4333/4333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 7ms/step - loss: 1.0032e-04
Epoch 7/10
[1m4333/4333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 7ms/step - loss: 9.5856e-05
Epoch 8/10
[1m4333/4333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 7ms/step - loss: 9.4336e-05
Epoch 9/10
[1m4333/4333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 7ms/step - loss: 9.5489e-05
Epoch 10/10
[1m4333/4333[0m [

<keras.src.callbacks.history.History at 0x7e69ed980f50>

In [None]:
# Make predictions using the trained LSTM model and evaluate its performance.
y_pred , y_real = [] , []
# Iterate through a specific range of the dataset for prediction.
for i in range(692720,693220-10,4):
  # Create a prediction window from scaled data.
  # Reshape for the LSTM model: (1, timesteps, features)
  # Here, assuming the model expects (1, 20, 1) based on the input_shape(20,1) from previous cell.
  window = sdata[i:i+10].reshape(1, -1, 1) # Reshape to (1, 20, 1) for the LSTM with input_shape=(20,1)
  # Get scaled prediction from the model.
  prediction_scaled = model.predict(window)
  # Inverse transform the prediction to original scale.
  prediction = scaler.inverse_transform(prediction_scaled)
  # Append predicted and real values to lists.
  y_pred.append(prediction[0])
  y_real.append(rdata[i+10])
# Convert prediction and real value lists to numpy arrays.
y_pred = np.array(y_pred)
y_real = np.array(y_real)

# Import metrics for evaluating regression models.
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
# Calculate Mean Absolute Error.
mae = mean_absolute_error(y_real, y_pred)
# Calculate Mean Squared Error.
mse = mean_squared_error(y_real, y_pred)
# Calculate Root Mean Squared Error.
rmse = np.sqrt(mse)
# Calculate R-squared.
r2 = r2_score(y_real, y_pred)

# Print the calculated model metrics.
print("\nModel Metrics:")
print("MAE :", mae)
print("MSE :", mse)
print("RMSE:", rmse)
print("R²  :", r2)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m