In [1]:
import pandas as pd
import numpy as np
import random

# Generate synthetic dataset for regression
def generate_regression_data(n_samples=1000):
    data = []
    for _ in range(n_samples):
        pipe_age = random.randint(1, 50)  # in years
        pressure = random.randint(30, 200)  # in psi
        temperature = random.uniform(0, 40)  # in Celsius
        humidity = random.randint(20, 90)  # in percentage
        cracks = random.choice([0, 1])  # 0: no cracks, 1: cracks present
        
        # Severity of leakage (continuous target)
        leakage_severity = (
            0.3 * pipe_age + 
            0.5 * cracks * pressure - 
            0.2 * temperature + 
            0.4 * humidity +
            random.uniform(-10, 10)  # noise
        )
        
        data.append([pipe_age, pressure, temperature, humidity, cracks, leakage_severity])
    
    # Convert to pandas DataFrame
    df = pd.DataFrame(data, columns=['Pipe Age', 'Pressure', 'Temperature', 'Humidity', 'Cracks', 'Leakage Severity'])
    return df

# Create the synthetic dataset
df = generate_regression_data(1000)

# Save the dataset to an Excel file
df.to_excel('water_leakage_regression_data.xlsx', index=False)

print("Synthetic regression data saved to 'water_leakage_regression_data.xlsx'")

Synthetic regression data saved to 'water_leakage_regression_data.xlsx'
