In [17]:
import pandas as pd
import numpy as np
from datetime import datetime

input_file='thingspeak_historical_data.csv'
output_file='cleaned_sensor_data.csv'


In [18]:
# Read CSV
df = pd.read_csv(input_file)
print(f"Original data: {len(df)} rows")

Original data: 5482 rows


In [19]:
# Remove duplicates based on entry_id
df = df.drop_duplicates(subset=['entry_id'])

In [20]:
# Remove rows with all sensor values as 0 or null
df = df[~((df['battery_voltage'] == 0) & (df['humidity'] == 0) & 
          (df['temperature'] == 0) & (df['motion_counts'] == 0))]


In [21]:
# Clean numeric columns - replace invalid values with NaN then forward fill
numeric_cols = ['battery_voltage', 'humidity', 'motion_counts', 'temperature']
for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')
    
    # Set realistic bounds
    if col == 'humidity':
        df[col] = df[col].clip(0, 100)
    elif col == 'temperature':
        df[col] = df[col].clip(-40, 80)
    elif col == 'battery_voltage':
        df[col] = df[col].clip(0, 5)
    elif col == 'motion_counts':
        df[col] = df[col].clip(0, None)

In [22]:
# Forward fill missing values
df[numeric_cols] = df[numeric_cols].fillna(method='ffill')

# Sort by entry_id
df = df.sort_values('entry_id').reset_index(drop=True)

  df[numeric_cols] = df[numeric_cols].fillna(method='ffill')


In [23]:
# Save cleaned data
df.to_csv(output_file, index=False)
print(f"Cleaned data: {len(df)} rows saved to {output_file}")

Cleaned data: 5471 rows saved to cleaned_sensor_data.csv


In [24]:
print(df.head())

   entry_id             timestamp  battery_voltage  humidity  motion_counts  \
0         1  2025-09-22T15:21:48Z            3.084      66.1          15172   
1         2  2025-09-22T15:51:49Z            3.086      65.9          15172   
2         3  2025-09-23T06:42:21Z            3.084      64.3          15782   
3         4  2025-09-23T06:48:59Z            3.085      64.0          15789   
4         5  2025-09-23T06:51:32Z            3.084      64.1          15794   

   temperature  
0        25.71  
1        25.79  
2        26.75  
3        26.77  
4        26.75  
