**Generate Data for high usage for construction tool**

In [1]:
import pandas as pd
import numpy as np
from faker import Faker
import matplotlib.pyplot as plt

In [2]:
df = 'accelerometer_dataset.csv'
df = pd.read_csv(df)

In [3]:
fake = Faker()

def calculate_baseline_stats(data):
    stats = {}
    statuses = df['status'].unique()
    for status in statuses:
        subset = df[df['status'] == status]
        stats[status] = {
            'x_mean': subset['x'].mean(),
            'x_std': subset['x'].std(),
            'y_mean': subset['y'].mean(),
            'y_std': subset['y'].std(),
            'z_mean': subset['z'].mean(),
            'z_std': subset['z'].std(),
        }
    return stats

baseline_stats = calculate_baseline_stats(df)

def generate_data_from_baseline(baseline_stats, total_rows):
    synthetic_data = []
    distribution = {
        'used': int(total_rows * 0.25),
        'false_motion': int(total_rows * 0.50),
        'idle': int(total_rows * 0.25)
    }
    
    for status, rows_per_status in distribution.items():
        stats = baseline_stats[status]
        x = np.random.normal(stats['x_mean'], stats['x_std'], size=rows_per_status)
        y = np.random.normal(stats['y_mean'], stats['y_std'], size=rows_per_status)
        z = np.random.normal(stats['z_mean'], stats['z_std'], size=rows_per_status)
        for i in range(rows_per_status):
            synthetic_data.append({
                'x': x[i],
                'y': y[i],
                'z': z[i],
                'status': status,
            })
    return pd.DataFrame(synthetic_data)

generated_data = generate_data_from_baseline(baseline_stats, 100000)


In [4]:
file_path = '2_less_used_data.csv'
generated_data.to_csv(file_path, index=False)