In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import numpy as np
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error



In [2]:
df = pd.read_csv("sensor_data.csv", parse_dates=['timestamp'])
df = df.sort_values('timestamp').reset_index(drop=True)
df.head()

Unnamed: 0,timestamp,Battery,Humidity,Motion,Temperature
0,2025-10-03T07:23:57.372953+03:00,3.066,62.3,0,26.75
1,2025-10-03T07:25:12+03:00,3.066,63.4,1,26.51
2,2025-10-03T07:25:12.824344+03:00,3.066,63.4,1,26.51
3,2025-10-03T07:34:17+03:00,3.066,65.1,2,25.64
4,2025-10-03T07:34:17.003882+03:00,3.066,65.1,2,25.64


In [3]:
df.shape
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 693 entries, 0 to 692
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   timestamp    693 non-null    object 
 1   Battery      693 non-null    float64
 2   Humidity     693 non-null    float64
 3   Motion       693 non-null    int64  
 4   Temperature  693 non-null    float64
dtypes: float64(3), int64(1), object(1)
memory usage: 27.2+ KB


In [4]:
df.describe(include='all')

Unnamed: 0,timestamp,Battery,Humidity,Motion,Temperature
count,693,693.0,693.0,693.0,693.0
unique,693,,,,
top,2025-10-07T19:32:09+03:00,,,,
freq,1,,,,
mean,,3.073068,70.761472,3044.090909,24.668932
std,,0.002997,2.245613,2015.603801,0.482345
min,,3.065,62.3,0.0,23.76
25%,,3.071,69.8,191.0,24.31
50%,,3.074,70.9,4400.0,24.69
75%,,3.075,71.8,4711.0,24.89


In [5]:
df.isnull().sum()

timestamp      0
Battery        0
Humidity       0
Motion         0
Temperature    0
dtype: int64

In [6]:
print(f"Number of duplicates: {df.duplicated().sum()}")

Number of duplicates: 0


In [7]:
# Create lag features for temperature
df['temp_lag1'] = df['Temperature'].shift(1)
df['temp_lag2'] = df['Temperature'].shift(2)
df['temp_lag3'] = df['Temperature'].shift(3)

# Create lag features for humidity
df['hum_lag1'] = df['Humidity'].shift(1)
df['hum_lag2'] = df['Humidity'].shift(2)
df['hum_lag3'] = df['Humidity'].shift(3)

# Target: next reading (~30 min ahead)
df['temp_next'] = df['Temperature'].shift(-1)
df['hum_next'] = df['Humidity'].shift(-1)

# Drop rows with NaN due to shifting
df.dropna(inplace=True)


In [8]:
features = ['temp_lag1','temp_lag2','temp_lag3',
            'hum_lag1','hum_lag2','hum_lag3']

X = df[features]
Y = df[['temp_next', 'hum_next']]   # two targets together

# Chronological split (no shuffling)
split_idx = int(len(df)*0.8)
X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
Y_train, Y_test = Y.iloc[:split_idx], Y.iloc[split_idx:]

In [9]:
rf = MultiOutputRegressor(
        RandomForestRegressor(
            n_estimators=200,
            random_state=42
        )
    )

# Train
rf.fit(X_train, Y_train)

# Predict
Y_pred = rf.predict(X_test)

# Separate predictions if needed
temp_pred = Y_pred[:, 0]
hum_pred  = Y_pred[:, 1]

# Evaluate (example with RMSE for each target)
rmse_temp = np.sqrt(mean_squared_error(Y_test.iloc[:,0], temp_pred))
rmse_hum  = np.sqrt(mean_squared_error(Y_test.iloc[:,1], hum_pred))

print(f"Temp RMSE: {rmse_temp:.3f}")
print(f"Hum  RMSE: {rmse_hum:.3f}")

Temp RMSE: 0.157
Hum  RMSE: 0.917


In [10]:
import joblib

# Save the combined temperature + humidity model
joblib.dump(rf, "temp_hum_model.joblib")


['temp_hum_model.joblib']

In [11]:
import joblib
import pandas as pd

# Load the combined model
rf = joblib.load("temp_hum_model.joblib")

# Take the last 3 readings
latest = df[['Temperature', 'Humidity']].tail(3)

latest_features = pd.DataFrame({
    "temp_lag1": [latest["Temperature"].iloc[2]],
    "temp_lag2": [latest["Temperature"].iloc[1]],
    "temp_lag3": [latest["Temperature"].iloc[0]],
    "hum_lag1":  [latest["Humidity"].iloc[2]],
    "hum_lag2":  [latest["Humidity"].iloc[1]],
    "hum_lag3":  [latest["Humidity"].iloc[0]],
})

# Predict: returns [[pred_temp, pred_hum]]
pred = rf.predict(latest_features)[0]
pred_temp, pred_hum = pred[0], pred[1]

print(f"Next reading forecast -> Temp: {pred_temp:.2f} °C, Humidity: {pred_hum:.1f} %")


Next reading forecast -> Temp: 24.69 °C, Humidity: 71.1 %
