# Synthetic Time-Series Dataset Generation
This notebook generates 12,000+ synthetic electrical time-series points and prepares ML-ready features.

In [1]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import os

np.random.seed(42)

n_series = 12000
start_time = datetime(2025, 6, 1)
freq_seconds = 60
anomaly_fraction = 0.03

def base_load(t_idx):
    day_frac = (t_idx % (24*60)) / (24*60)
    daily = 100 + 40*np.sin(2*np.pi*day_frac - 0.5)
    weekly = 5*np.sin(2*np.pi*(t_idx/(24*60*7)))
    noise = np.random.normal(0, 2)
    return daily + weekly + noise

def inject_anomaly(value, kind):
    if kind == "spike": return value + np.random.uniform(50,120)
    if kind == "drop": return value - np.random.uniform(40,90)
    if kind == "drift": return value + np.random.uniform(20,60)
    if kind == "noise_burst": return value + np.random.normal(0,30)
    return value

rows=[]
labels=np.zeros(n_series,int)

for i in range(n_series):
    ts=start_time+timedelta(seconds=i*freq_seconds)
    base=base_load(i)
    voltage=230+np.random.normal(0,0.5)
    current=base/voltage+np.random.normal(0,0.02)
    power=voltage*current
    pf=max(0.7,min(1.0,0.95+np.random.normal(0,0.02)))
    freq=50+np.random.normal(0,0.02)
    harm=abs(np.random.normal(0,0.01))

    if np.random.rand()<anomaly_fraction:
        kind=np.random.choice(["spike","drop","drift","noise_burst"])
        power=inject_anomaly(power,kind)
        current=power/voltage
        labels[i]=1

    rows.append([ts,voltage,current,power,pf,freq,harm,labels[i]])

df=pd.DataFrame(rows,columns=["timestamp","voltage","current","power","pf","freq","harmonics","anomaly"])
df.head()

Unnamed: 0,timestamp,voltage,current,power,pf,freq,harmonics,anomaly
0,2025-06-01 00:00:00,229.930868,0.368784,84.794879,0.980461,49.995317,0.002341,0
1,2025-06-01 00:01:00,230.505258,0.342116,78.859618,0.939497,49.988572,0.009241,0
2,2025-06-01 00:02:00,229.718856,0.317924,73.033108,0.956285,49.98184,0.014123,0
3,2025-06-01 00:03:00,230.29886,0.398714,91.823453,0.957885,50.002444,0.005154,0
4,2025-06-01 00:04:00,229.854153,0.337099,77.48363,0.987046,49.99973,0.010577,0


In [2]:
df['power_ma_5']=df['power'].rolling(5,min_periods=1).mean()
df['power_std_5']=df['power'].rolling(5,min_periods=1).std().fillna(0)
df['current_diff_1']=df['current'].diff().fillna(0)
df['power_pct_change_1']=df['power'].pct_change().fillna(0)
df.head()

Unnamed: 0,timestamp,voltage,current,power,pf,freq,harmonics,anomaly,power_ma_5,power_std_5,current_diff_1,power_pct_change_1
0,2025-06-01 00:00:00,229.930868,0.368784,84.794879,0.980461,49.995317,0.002341,0,84.794879,0.0,0.0,0.0
1,2025-06-01 00:01:00,230.505258,0.342116,78.859618,0.939497,49.988572,0.009241,0,81.827248,4.196863,-0.026668,-0.069996
2,2025-06-01 00:02:00,229.718856,0.317924,73.033108,0.956285,49.98184,0.014123,0,78.895868,5.880969,-0.024192,-0.073885
3,2025-06-01 00:03:00,230.29886,0.398714,91.823453,0.957885,50.002444,0.005154,0,82.127764,8.052193,0.08079,0.257285
4,2025-06-01 00:04:00,229.854153,0.337099,77.48363,0.987046,49.99973,0.010577,0,81.198938,7.276122,-0.061615,-0.156167


In [3]:
os.makedirs('/mnt/data/synthetic_timeseries_notebook',exist_ok=True)
path='/mnt/data/synthetic_timeseries_notebook/synthetic_timeseries.csv'
df.to_csv(path,index=False)
path

'/mnt/data/synthetic_timeseries_notebook/synthetic_timeseries.csv'