# <center>Visualization & EDA for Household-Power-Consumption Data<center/>

In [0]:
# !git clone https://github.com/Gci04/Household-Power-Consumption.git
# !mv ./Household-Power-Consumption/* ./

In [0]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")

from tqdm import tqdm
import plotly.graph_objects as go

## Read Data, fill missing data mean and combine 3 meters readings

In [3]:
data_path = "./Dataset/household_power_consumption_data.zip"
df = pd.read_csv(data_path, sep=';',parse_dates={'dt' : ['Date', 'Time']}, infer_datetime_format=True,
                 low_memory=False, na_values=['nan','?'],index_col='dt')
df.drop(["Global_active_power","Global_reactive_power","Voltage","Global_intensity"],axis=1,inplace=True)

#fill nan values with column average
for j in range(0,3):
    df.iloc[:,j]=df.iloc[:,j].fillna(df.iloc[:,j].mean())

df["consumption"] = df.iloc[:,:].sum(axis=1)
df.head()

Unnamed: 0_level_0,Sub_metering_1,Sub_metering_2,Sub_metering_3,consumption
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2006-12-16 17:24:00,0.0,1.0,17.0,18.0
2006-12-16 17:25:00,0.0,1.0,16.0,17.0
2006-12-16 17:26:00,0.0,2.0,17.0,19.0
2006-12-16 17:27:00,0.0,1.0,17.0,18.0
2006-12-16 17:28:00,0.0,1.0,17.0,18.0


In [0]:
# grouped = df["consumption"].groupby(pd.Grouper(freq='1h', base=0, label='right')).sum()
# data = pd.DataFrame(grouped/60)

## Group samples to granuality of 1 hour

In [0]:
grouped = df.groupby(pd.Grouper(freq='24h', base=0, label='right')).sum()
data = pd.DataFrame(grouped/60)

In [0]:
day_name = (data.reset_index().copy())["dt"].apply(lambda x: x.day_name())
data["day"] = list(day_name)

## Plot for energy consumption for each day of the week

In [0]:
DEFAULT_PLOTLY_COLORS=['rgb(31, 119, 180)', 'rgb(255, 127, 14)',
                       'rgb(44, 160, 44)', 'rgb(214, 39, 40)',
                       'rgb(148, 103, 189)', 'rgb(140, 86, 75)',
                       'rgb(227, 119, 194)', 'rgb(127, 127, 127)',
                       'rgb(188, 189, 34)', 'rgb(23, 190, 207)']
colors = [
    '#1f77b4',  # muted blue
    '#ff7f0e',  # safety orange
    '#2ca02c',  # cooked asparagus green
    '#d62728',  # brick red
    '#9467bd',  # muted purple
    '#8c564b',  # chestnut brown
    '#e377c2',  # raspberry yogurt pink
    '#7f7f7f',  # middle gray
    '#bcbd22',  # curry yellow-green
    '#17becf'   # blue-teal
]

In [23]:
fig = go.Figure()

for name, group in data.groupby(data.day):
  fig.add_trace(go.Scatter(x=group.index, y=group['consumption'], name=f"{name}",line=dict(color=colors.pop()),fillcolor=DEFAULT_PLOTLY_COLORS.pop()))

fig.update_layout(title_text='Energy Consumption (Grouped by weekday)')
fig.update_layout( xaxis=go.layout.XAxis(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1m",
                     step="month",
                     stepmode="backward"),
                dict(count=6,
                     label="6m",
                     step="month",
                     stepmode="backward"),
                dict(count=1,
                     label="YTD",
                     step="year",
                     stepmode="todate"),
                dict(count=1,
                     label="1y",
                     step="year",
                     stepmode="backward"),
                dict(step="all",label="ALL")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date")
    )
                  

## Meter readings plots 

In [0]:
#plot colors
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b','#e377c2', '#7f7f7f', '#bcbd22', '#17becf']

In [10]:
# Initialize figure
fig = go.Figure()

# Add Traces
fig.add_trace(
    go.Scatter(x=data.index,
               y=data.Sub_metering_1,
               name="Sub_metering_1",
               line=dict(color=colors[1])))

fig.add_trace(
    go.Scatter(x=data.index,
               y=[data.Sub_metering_1.mean()] * len(data.index),
               name="Sub_metering_1 Average",
               visible=False,
               line=dict(color=colors[1], dash="dash")))

fig.add_trace(
    go.Scatter(x=data.index,
               y=data.Sub_metering_2,
               name="Sub_metering_2",
               line=dict(color=colors[2])))

fig.add_trace(
    go.Scatter(x=data.index,
               y=[data.Sub_metering_2.mean()] * len(data.index),
               name="Sub_metering_2 Average",
               visible=False,
               line=dict(color=colors[2], dash="dash")))
fig.add_trace(
    go.Scatter(x=data.index,
               y=data.Sub_metering_3,
               name="Sub_metering_3",
               line=dict(color=colors[3])))

fig.add_trace(
    go.Scatter(x=data.index,
               y=[data.Sub_metering_3.mean()] * len(data.index),
               name="Sub_metering_3 Average",
               visible=False,
               line=dict(color=colors[3], dash="dash")))

# Add Annotations and Buttons
meter1_annotations = [dict(x=data.Sub_metering_1.idxmax(),
                         y=data.Sub_metering_1.max(),
                         xref="x", yref="y",
                         text="Sub_metering_1 Max:<br> %.2f" % data.Sub_metering_1.max(),
                         ax=0, ay=-40)]
meter2_annotations = [dict(x=data.Sub_metering_2.idxmax(),
                        y=data.Sub_metering_2.max(),
                        xref="x", yref="y",
                        text="Sub_metering_3 Max:<br> %.2f" % data.Sub_metering_3.max(),
                        ax=0, ay=40)]
meter3_annotations = [dict(x=data.Sub_metering_3.idxmax(),
                        y=data.Sub_metering_3.max(),
                        xref="x", yref="y",
                        text="Sub_metering_3 Max:<br> %.2f" % data.Sub_metering_3.max(),
                        ax=0, ay=40)]

fig.update_layout(
    updatemenus=[
        go.layout.Updatemenu(
            type="buttons",
            direction="right",
            active=0,
            x=0.57,
            y=1.2,
            buttons=list([
                dict(label="None",
                     method="update",
                     args=[{"visible": [True, False, True, False, True, False]},
                           {"title": "Energy Consupmtion",
                            "annotations": []}]),
                dict(label="Meter 1",
                     method="update",
                     args=[{"visible": [True, True, False, False, False, False]},
                           {"title": "Sub - metering 3",
                            "annotations": meter1_annotations}]),
                dict(label="Meter 2",
                     method="update",
                     args=[{"visible": [False, False, True, True, False, False]},
                           {"title": "Sub - metering 3",
                            "annotations": meter2_annotations}]),
                dict(label="Meter 3",
                     method="update",
                     args=[{"visible": [False, False, False, False, True, True]},
                           {"title": "Sub - metering 3",
                            "annotations": meter3_annotations}]),
                dict(label="All",
                     method="update",
                     args=[{"visible": [True, True, True, True, True, True]},
                           {"title": "Sub - metering 1,2,3",
                            "annotations": meter1_annotations + meter2_annotations + meter3_annotations}]),
            ]),
        )
    ])

# Set title
fig.update_layout(title_text="Energy Consumption meter readings",)
# Add range slider
fig.update_layout(
    xaxis=go.layout.XAxis(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                    label="1day",
                    step="day",
                    stepmode="backward"),
                dict(count=1,
                     label="1m",
                     step="month",
                     stepmode="backward"),
                dict(count=6,
                     label="6m",
                     step="month",
                     stepmode="backward"),
                dict(count=1,
                     label="YTD",
                     step="year",
                     stepmode="todate"),
                dict(count=1,
                     label="1y",
                     step="year",
                     stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date")
    )

fig.show()