In [22]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from datetime import datetime, timedelta

In [23]:
# Set a random seed for reproducibility
np.random.seed(42)

In [29]:
# Function to generate synthetic data for a month

def generate_data(start_date, end_date, interval_minutes):
    current_date = start_date
    data = []

    while current_date <= end_date:
        # Simulate random occupancy (0 or 1)
        occupancy = np.random.choice([0, 1])

        # Append data point
        data.append([current_date, occupancy])

        # Move to the next time interval
        current_date += timedelta(minutes=interval_minutes)

    return pd.DataFrame(data, columns=['datetime', 'occupancy'])

In [30]:
# Set start and end dates for data generation
start_date = datetime(2023, 1, 1, 0, 0)
end_date = datetime(2023, 1, 31, 23, 59)

In [31]:
# Generate synthetic data with 30-minute intervals
interval_minutes = 30
synthetic_data = generate_data(start_date, end_date, interval_minutes)

In [32]:
# Extract separate attributes for date and time
synthetic_data['date'] = synthetic_data['datetime'].dt.date
synthetic_data['time'] = synthetic_data['datetime'].dt.time

In [33]:
synthetic_data['time']


0       00:00:00
1       00:30:00
2       01:00:00
3       01:30:00
4       02:00:00
          ...   
1483    21:30:00
1484    22:00:00
1485    22:30:00
1486    23:00:00
1487    23:30:00
Name: time, Length: 1488, dtype: object

In [34]:
synthetic_data

Unnamed: 0,datetime,occupancy,date,time
0,2023-01-01 00:00:00,0,2023-01-01,00:00:00
1,2023-01-01 00:30:00,0,2023-01-01,00:30:00
2,2023-01-01 01:00:00,0,2023-01-01,01:00:00
3,2023-01-01 01:30:00,0,2023-01-01,01:30:00
4,2023-01-01 02:00:00,0,2023-01-01,02:00:00
...,...,...,...,...
1483,2023-01-31 21:30:00,1,2023-01-31,21:30:00
1484,2023-01-31 22:00:00,0,2023-01-31,22:00:00
1485,2023-01-31 22:30:00,0,2023-01-31,22:30:00
1486,2023-01-31 23:00:00,0,2023-01-31,23:00:00


In [35]:
synthetic_data.drop('datetime', axis=1, inplace=True)

In [36]:
synthetic_data

Unnamed: 0,occupancy,date,time
0,0,2023-01-01,00:00:00
1,0,2023-01-01,00:30:00
2,0,2023-01-01,01:00:00
3,0,2023-01-01,01:30:00
4,0,2023-01-01,02:00:00
...,...,...,...
1483,1,2023-01-31,21:30:00
1484,0,2023-01-31,22:00:00
1485,0,2023-01-31,22:30:00
1486,0,2023-01-31,23:00:00


In [37]:
# Convrtitng Date Column to numerical values
synthetic_data['date_numeric'] = synthetic_data['date'].apply(lambda x: x.toordinal())

In [39]:
# Convrtitng Time Column to numerical values
synthetic_data['time_numeric'] = synthetic_data['time'].apply(lambda x: x.hour * 60 + x.minute)

In [40]:
synthetic_data

Unnamed: 0,occupancy,date,time,date_numeric,time_numeric
0,0,2023-01-01,00:00:00,738521,0
1,0,2023-01-01,00:30:00,738521,30
2,0,2023-01-01,01:00:00,738521,60
3,0,2023-01-01,01:30:00,738521,90
4,0,2023-01-01,02:00:00,738521,120
...,...,...,...,...,...
1483,1,2023-01-31,21:30:00,738551,1290
1484,0,2023-01-31,22:00:00,738551,1320
1485,0,2023-01-31,22:30:00,738551,1350
1486,0,2023-01-31,23:00:00,738551,1380


In [41]:
synthetic_data.drop(['date','time'], axis=1, inplace=True)

In [42]:
synthetic_data

Unnamed: 0,occupancy,date_numeric,time_numeric
0,0,738521,0
1,0,738521,30
2,0,738521,60
3,0,738521,90
4,0,738521,120
...,...,...,...
1483,1,738551,1290
1484,0,738551,1320
1485,0,738551,1350
1486,0,738551,1380


In [44]:
# Split data into features (X) and target (y)
X = synthetic_data[['date_numeric', 'time_numeric']]
y = synthetic_data['occupancy']

In [45]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [46]:
# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [47]:
# Train a Logistic Regression model
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

In [48]:
# Make predictions on the test set
y_pred = model.predict(X_test_scaled)


In [49]:
# Convert predictions to binary (0 or 1)
y_pred_binary = np.round(y_pred)

In [50]:
# Evaluate the model accuracy
accuracy = accuracy_score(y_test, y_pred_binary)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

Model Accuracy: 50.67%


In [72]:
from sklearn.ensemble import RandomForestClassifier

# Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=1000, random_state=42)
model.fit(X_train_scaled, y_train)

In [73]:
# Make predictions on the test set
y_pred = model.predict(X_test_scaled)


In [74]:
# Convert predictions to binary (0 or 1)
y_pred_binary = np.round(y_pred)

In [75]:
# Evaluate the model accuracy
accuracy = accuracy_score(y_test, y_pred_binary)
print(f"Model Accuracy: {accuracy * 100:.2f}%")


Model Accuracy: 55.03%
