# Preprocessing Household Electricity Consumption Data
This notebook demonstrates preprocessing steps on synthetic household electricity consumption data.

In [13]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load the dataset
data = pd.read_csv('Synthetic_Household_Electricity_Consumption_Dataset.csv')
data.head()

Unnamed: 0,Appliance,Voltage (V),Power Consumption (W),Daily Usage (hrs),Monthly Usage (%)
0,Fan,230,82.12,8.0,19.02
1,Air Conditioner,230,1588.76,4.21,40.17
2,Refrigerator,230,199.58,24.0,8.23
3,LED Bulb,230,15.94,7.9,7.15
4,Television,230,98.72,3.09,4.35


## Step 1: Check for Missing Values

In [14]:
missing_values = data.isnull().sum()
missing_values

Appliance                0
Voltage (V)              0
Power Consumption (W)    0
Daily Usage (hrs)        0
Monthly Usage (%)        0
dtype: int64

## Step 2: Standardize Numerical Data
Using `StandardScaler` to standardize *Power Consumption (W)* and *Daily Usage (hrs)*.

In [15]:
scaler = StandardScaler()
data[['Power Consumption (W)', 'Daily Usage (hrs)']] = scaler.fit_transform(data[['Power Consumption (W)', 'Daily Usage (hrs)']])
data.head()

Unnamed: 0,Appliance,Voltage (V),Power Consumption (W),Daily Usage (hrs),Monthly Usage (%)
0,Fan,230,-0.905778,0.34456,19.02
1,Air Conditioner,230,0.801732,-0.190922,40.17
2,Refrigerator,230,-0.772658,2.605167,8.23
3,LED Bulb,230,-0.980782,0.330431,7.15
4,Television,230,-0.886965,-0.349164,4.35


## Step 3: Add Derived Feature - Daily Energy Consumption (Wh)
Calculate the daily energy consumption by multiplying *Power Consumption (W)* and *Daily Usage (hrs)*.

In [16]:
data['Daily Energy Consumption (Wh)'] = data['Power Consumption (W)'] * data['Daily Usage (hrs)']
data.head()

Unnamed: 0,Appliance,Voltage (V),Power Consumption (W),Daily Usage (hrs),Monthly Usage (%),Daily Energy Consumption (Wh)
0,Fan,230,-0.905778,0.34456,19.02,-0.312095
1,Air Conditioner,230,0.801732,-0.190922,40.17,-0.153068
2,Refrigerator,230,-0.772658,2.605167,8.23,-2.012904
3,LED Bulb,230,-0.980782,0.330431,7.15,-0.324081
4,Television,230,-0.886965,-0.349164,4.35,0.309697


## Saving the Preprocessed Dataset

In [17]:
processed_file_path = 'Preprocessed_Household_Electricity_Consumption_Dataset.csv'
data.to_csv(processed_file_path, index=False)
processed_file_path


'Preprocessed_Household_Electricity_Consumption_Dataset.csv'

## Data Visualization

In [18]:
# importing 
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

## Distribution of Power Consumption

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(data['Power Consumption (W)'], bins=30, kde=True)
plt.title('Distribution of Power Consumption (W)')
plt.xlabel('Power Consumption (W)')
plt.ylabel('Frequency')
plt.show()


## Average Daily Usage by Appliance Type

In [None]:
plt.figure(figsize=(12, 6))
sns.barplot(x='Appliance', y='Daily Usage (hrs)', data=data, estimator='mean', ci=None, palette='viridis')
plt.title('Average Daily Usage (Hours) by Appliance Type')
plt.xticks(rotation=45)
plt.xlabel('Appliance')
plt.ylabel('Average Daily Usage (hrs)')
plt.show()
