In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import numpy as np
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error



In [13]:
df = pd.read_csv("sensor_data.csv", parse_dates=['timestamp'])
df = df.sort_values('timestamp').reset_index(drop=True)
df.head()

Unnamed: 0,timestamp,battery,humidity,motion,temperature
0,2025-09-14T15:21:09+03:00,3.081,66.1,7761,25.86
1,2025-09-14T15:23:36+03:00,3.08,66.1,7763,25.85
2,2025-09-14T15:25:07+03:00,3.081,66.2,7766,25.86
3,2025-09-14T15:25:40+03:00,3.081,66.3,7767,25.85
4,2025-09-14T15:28:06+03:00,3.079,66.3,7768,25.88


In [14]:
df.shape
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 949 entries, 0 to 948
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   timestamp    949 non-null    object 
 1   battery      949 non-null    float64
 2   humidity     949 non-null    float64
 3   motion       949 non-null    int64  
 4   temperature  949 non-null    float64
dtypes: float64(3), int64(1), object(1)
memory usage: 37.2+ KB


In [15]:
df.describe(include='all')

Unnamed: 0,timestamp,battery,humidity,motion,temperature
count,949,949.0,949.0,949.0,949.0
unique,949,,,,
top,2025-09-14T15:21:09+03:00,,,,
freq,1,,,,
mean,,3.082228,66.256375,11895.880927,25.579758
std,,0.004677,3.221092,6162.312172,0.673838
min,,3.067,52.0,155.0,23.77
25%,,3.082,64.7,8050.0,24.96
50%,,3.084,66.0,12764.0,25.6
75%,,3.085,68.7,15774.0,26.09


In [16]:
df.isnull().sum()

timestamp      0
battery        0
humidity       0
motion         0
temperature    0
dtype: int64

In [17]:
print(f"Number of duplicates: {df.duplicated().sum()}")

Number of duplicates: 0


In [18]:
# Create lag features for temperature
df['temp_lag1'] = df['temperature'].shift(1)
df['temp_lag2'] = df['temperature'].shift(2)
df['temp_lag3'] = df['temperature'].shift(3)

# Create lag features for humidity
df['hum_lag1'] = df['humidity'].shift(1)
df['hum_lag2'] = df['humidity'].shift(2)
df['hum_lag3'] = df['humidity'].shift(3)

# Target: next reading (~30 min ahead)
df['temp_next'] = df['temperature'].shift(-1)
df['hum_next'] = df['humidity'].shift(-1)

# Drop rows with NaN due to shifting
df.dropna(inplace=True)


In [19]:
features = ['temp_lag1','temp_lag2','temp_lag3',
            'hum_lag1','hum_lag2','hum_lag3']

X = df[features]
Y = df[['temp_next', 'hum_next']]   # two targets together

# Chronological split (no shuffling)
split_idx = int(len(df)*0.8)
X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
Y_train, Y_test = Y.iloc[:split_idx], Y.iloc[split_idx:]

In [20]:
rf = MultiOutputRegressor(
        RandomForestRegressor(
            n_estimators=200,
            random_state=42
        )
    )

# Train
rf.fit(X_train, Y_train)

# Predict
Y_pred = rf.predict(X_test)

# Separate predictions if needed
temp_pred = Y_pred[:, 0]
hum_pred  = Y_pred[:, 1]

# Evaluate (example with RMSE for each target)
rmse_temp = np.sqrt(mean_squared_error(Y_test.iloc[:,0], temp_pred))
rmse_hum  = np.sqrt(mean_squared_error(Y_test.iloc[:,1], hum_pred))

print(f"Temp RMSE: {rmse_temp:.3f}")
print(f"Hum  RMSE: {rmse_hum:.3f}")

Temp RMSE: 0.247
Hum  RMSE: 1.315


In [21]:
import joblib

# Save the combined temperature + humidity model
joblib.dump(rf, "temp_hum_model.joblib")


['temp_hum_model.joblib']

In [22]:
import joblib
import pandas as pd

# Load the combined model
rf = joblib.load("temp_hum_model.joblib")

# Take the last 3 readings
latest = df[['temperature', 'humidity']].tail(3)

latest_features = pd.DataFrame({
    "temp_lag1": [latest["temperature"].iloc[2]],
    "temp_lag2": [latest["temperature"].iloc[1]],
    "temp_lag3": [latest["temperature"].iloc[0]],
    "hum_lag1":  [latest["humidity"].iloc[2]],
    "hum_lag2":  [latest["humidity"].iloc[1]],
    "hum_lag3":  [latest["humidity"].iloc[0]],
})

# Predict: returns [[pred_temp, pred_hum]]
pred = rf.predict(latest_features)[0]
pred_temp, pred_hum = pred[0], pred[1]

print(f"Next reading forecast -> Temp: {pred_temp:.2f} °C, Humidity: {pred_hum:.1f} %")


Next reading forecast -> Temp: 25.95 °C, Humidity: 67.8 %
