<a href="https://colab.research.google.com/github/LegendSeyi/ML-PROJECTS/blob/main/Occupancy_prediction_IOT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ML PROJECT: PREDICTION OF OCCUPANCY USING IOT DEVICES FOR SMART HOME

In [None]:
import numpy as np
import pandas as pd
import random

No real data available yet, so i generated synthetic data as a test data for the project.

In [1]:
np.random.seed(42)

# Simulation parameters
days = 7
samples_per_hour = 6  # every 10 minutes
total_samples = days * 24 * samples_per_hour

timestamps = pd.date_range(
    start="2026-01-01",
    periods=total_samples,
    freq="10min"
)

data = []

for ts in timestamps:
    hour = ts.hour

    # Occupancy probability pattern
    if 6 <= hour <= 8 or 18 <= hour <= 23:
        occupancy = np.random.choice([1,0], p=[0.75,0.25])
    elif 9 <= hour <= 17:
        occupancy = np.random.choice([1,0], p=[0.2,0.8])
    else:
        occupancy = np.random.choice([1,0], p=[0.1,0.9])

    # Voltage simulation (stable grid)
    voltage = np.random.normal(230, 3)

    # Current simulation
    if occupancy == 1:
        current = np.random.normal(1.8, 0.4)
    else:
        current = np.random.normal(0.2, 0.05)

    power = voltage * current

    data.append([ts, voltage, current, power, occupancy])

df = pd.DataFrame(
    data,
    columns=["timestamp", "voltage", "current", "power", "occupancy"]
)

df.head()

Unnamed: 0,timestamp,voltage,current,power,occupancy
0,2026-01-01 00:00:00,226.66436,0.215945,48.94706,0
1,2026-01-01 00:10:00,234.737638,0.238372,55.954818,0
2,2026-01-01 00:20:00,228.257366,1.589932,362.913707,1
3,2026-01-01 00:30:00,230.725887,0.104336,24.073013,0
4,2026-01-01 00:40:00,232.449335,0.123806,28.778669,0


In [3]:
df.shape

(1008, 5)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1008 entries, 0 to 1007
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   timestamp  1008 non-null   datetime64[ns]
 1   voltage    1008 non-null   float64       
 2   current    1008 non-null   float64       
 3   power      1008 non-null   float64       
 4   occupancy  1008 non-null   int64         
dtypes: datetime64[ns](1), float64(3), int64(1)
memory usage: 39.5 KB


In [5]:
df.describe()

Unnamed: 0,timestamp,voltage,current,power,occupancy
count,1008,1008.0,1008.0,1008.0,1008.0
mean,2026-01-04 11:55:00,229.854385,0.839893,193.018202,0.391865
min,2026-01-01 00:00:00,219.557574,0.026429,6.049943,0.0
25%,2026-01-02 17:57:30,227.925862,0.195965,44.811979,0.0
50%,2026-01-04 11:55:00,229.923686,0.247219,56.880219,0.0
75%,2026-01-06 05:52:30,231.738471,1.692795,390.491371,1.0
max,2026-01-07 23:50:00,238.334939,3.043967,718.336748,1.0
std,,2.970307,0.831187,191.072739,0.488409


In [9]:
df[['voltage','current','power','occupancy']] = df[['voltage','current','power','occupancy']].round(2)

In [10]:
df

Unnamed: 0,timestamp,voltage,current,power,occupancy
0,2026-01-01 00:00:00,226.66,0.22,48.95,0
1,2026-01-01 00:10:00,234.74,0.24,55.95,0
2,2026-01-01 00:20:00,228.26,1.59,362.91,1
3,2026-01-01 00:30:00,230.73,0.10,24.07,0
4,2026-01-01 00:40:00,232.45,0.12,28.78,0
...,...,...,...,...,...
1003,2026-01-07 23:10:00,230.55,2.88,663.35,1
1004,2026-01-07 23:20:00,225.72,2.11,476.85,1
1005,2026-01-07 23:30:00,229.76,1.47,337.00,1
1006,2026-01-07 23:40:00,226.44,0.87,197.28,1


In [12]:
df.power.describe()

Unnamed: 0,power
count,1008.0
mean,193.018155
std,191.072656
min,6.05
25%,44.815
50%,56.88
75%,390.4875
max,718.34
