In [1]:

import os, time, pandas as pd,random
from datetime import datetime

# Output directory
output_dir = "sensor_data/csv"
os.makedirs(output_dir, exist_ok=True)

# Define the headers for the CSV file related to customer transactions
header = ['sensor_id', 'timestamp', 'temperature', 'humidity']


In [2]:

# Generate random temperature and humidity data
def generate_temperature_humidity_data(sensor_id=1):
    temp = round(random.uniform(18, 30), 2)
    return {
        "sensor_id": sensor_id,
        "timestamp": datetime.now().isoformat(),
        "temperature": temp,
        "temperature_k" : temp + 273.15,
        "temperature_f" : round((temp * 9/5 + 32), 2),
        "humidity": round(random.uniform(40, 70), 2) if random.random() > 0.2 else None
    }


In [3]:
# Save data as CSV
def save_temperature_humidity_csv(filename, data):
    filepath = os.path.join(output_dir, filename)
    df = pd.DataFrame(data)
    df.to_csv(filepath, mode='w', index=False, header=True)  
    print(f"Saved Temperature & Humidity CSV: {filepath}")

In [4]:

# User input for number of times to run
num_iterations = int(input("Enter number of readings to generate: "))

temperature_humidity_sensor_data = []
# Generate and save sensor data
for i in range(num_iterations):
    temperature_humidity_data = generate_temperature_humidity_data()
    temperature_humidity_sensor_data.append(temperature_humidity_data)
    print(temperature_humidity_data)
    time.sleep(0.01)
    if i %2 == 0 and i != 0:
        temperature_humidity_sensor_data.append(temperature_humidity_data)

save_temperature_humidity_csv("temperature_humidity.csv", temperature_humidity_sensor_data)


{'sensor_id': 1, 'timestamp': '2025-03-31T15:29:58.186187', 'temperature': 19.5, 'temperature_k': 292.65, 'temperature_f': 67.1, 'humidity': 59.44}
{'sensor_id': 1, 'timestamp': '2025-03-31T15:29:58.198188', 'temperature': 22.96, 'temperature_k': 296.10999999999996, 'temperature_f': 73.33, 'humidity': None}
{'sensor_id': 1, 'timestamp': '2025-03-31T15:29:58.209194', 'temperature': 25.61, 'temperature_k': 298.76, 'temperature_f': 78.1, 'humidity': None}
{'sensor_id': 1, 'timestamp': '2025-03-31T15:29:58.220189', 'temperature': 21.46, 'temperature_k': 294.60999999999996, 'temperature_f': 70.63, 'humidity': None}
{'sensor_id': 1, 'timestamp': '2025-03-31T15:29:58.231200', 'temperature': 18.38, 'temperature_k': 291.53, 'temperature_f': 65.08, 'humidity': 52.79}
{'sensor_id': 1, 'timestamp': '2025-03-31T15:29:58.242193', 'temperature': 28.05, 'temperature_k': 301.2, 'temperature_f': 82.49, 'humidity': 49.92}
{'sensor_id': 1, 'timestamp': '2025-03-31T15:29:58.253190', 'temperature': 21.34, '

In [9]:
from sklearn.linear_model import LinearRegression


def impute_missing_values(df, strategy ="mean"):
    if strategy == "mean":
        df["humidity"] = df["humidity"].fillna(df["humidity"].mean()).round(2)
    elif strategy == "median":
        df["humidity"] = df["humidity"].fillna(df["humidity"].median()).round(2)
    elif strategy == "mode":
        df["humidity"] = df["humidity"].fillna(df["humidity"].mode()[0]).round(2)
    elif strategy == "regression":
        hum_train = df.dropna(subset=["humidity"])
        model = LinearRegression()
        model.fit(hum_train[["temperature"]], hum_train["humidity"])
        missing_hum = df[df["humidity"].isnull()]
        predicted_hum = model.predict(missing_hum[["temperature"]]).round(2)
        df.loc[df["humidity"].isnull(), "humidity"] = predicted_hum
    return df



In [11]:
# Drop duplicates
df = pd.DataFrame(temperature_humidity_sensor_data)
save_temperature_humidity_csv("temperature_humidity_errors.csv", df)

df = df.drop_duplicates()

# Check for missing values
# missing_values = df.isnull().sum()
# print(missing_values)

# if missing_values.any():
#     missing_columns = missing_values[missing_values > 0].index.tolist()
#     print("Columns with missing values:", missing_columns)
#     # Drop rows with missing values
#     df = df.dropna()

df = impute_missing_values(df, strategy="regression")
df.saveto_csv("sensor_data/temperature_humidity_imputed.csv", index=False)
save_temperature_humidity_csv("temperature_humidity.csv", df)

Saved Temperature & Humidity CSV: sensor_data/csv\temperature_humidity_errors.csv


AttributeError: 'DataFrame' object has no attribute 'saveto_csv'