In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from ipywidgets import interact , interactive , widgets, fixed
import warnings

In [2]:
warnings.filterwarnings('ignore') #ignores all the warning messages in the output 

In [3]:
sns.set(style = 'whitegrid')

In [5]:
np.random.seed(42) # this is done in order to get the same random number everytime rather than changing numbers 

In [6]:
num_rows = 1000 # we want 1000 rows in our dataset i.e 1000 households
#Now setting the categories to which we will assign random values 
states = ['Delhi' , 'Mumbai', 'Kolkata' , 'Jammu & Kashmir' , 'Gujarat' ]
regions = ['Urban' , 'Rural']
appliances = ['AC' , 'Fridge' , 'Fan', 'TV' , 'Lights']
time_of_day = ['Morning' , 'Afternoon', 'Evening', 'Night']


In [14]:
#now we generate a dictionary generating all the random rows which will be later converted to a dataframe 
#Each key is a column name, and each value is a NumPy array of length 1000 (as set by num_rows).
data = {
    
    'Household_ID' : np.arange(1, num_rows+1), #np.arange(start, stop) is like Python’s fancy version of range(), but returns a NumPy array instead of a list.
    'State': np.random.choice(states , num_rows), #randomly assigns a state to each household, making your data look more realistic and varied.
    'Region' : np.random.choice(regions , num_rows),
    'Appliances' : np.random.choice(appliances , num_rows), #np.random.choice() — random picks from an array or list
    'Units_Consumed': np.round(np.random.normal(4, 2, num_rows), 2),# This generates num_rows random numbers from a normal (Gaussian) distribution.Mean (μ) = 4 Standard deviation (σ) = 2 So, you’re simulating data that clusters around 4 with a spread uptill 2 with round of decimal answers till 2 decimal places
    'Usage Time' : np.random.choice(time_of_day, num_rows), 
    'Monthly Income' :np.random.randint(10000,100000,num_rows) # Generates random integer monthly income between ₹10,000 and ₹99,999 per household. Simulates realistic socio-economic variation.
}
    

In [15]:
df = pd.DataFrame(data) #Now you can work with it like a regular table, do filtering, visualizations, etc.



In [16]:
df

Unnamed: 0,Household_ID,State,Region,Appliances,Units_Consumed,Usage Time,Monthly Income
0,1,Kolkata,Rural,AC,3.24,Morning,16338
1,2,Delhi,Urban,Lights,0.91,Night,61761
2,3,Jammu & Kashmir,Rural,TV,3.27,Evening,19622
3,4,Jammu & Kashmir,Rural,TV,7.29,Night,58423
4,5,Gujarat,Urban,TV,5.18,Night,52873
...,...,...,...,...,...,...,...
995,996,Mumbai,Urban,Lights,6.18,Night,95476
996,997,Kolkata,Rural,Lights,6.71,Night,18988
997,998,Kolkata,Urban,AC,3.09,Afternoon,88488
998,999,Jammu & Kashmir,Rural,TV,1.95,Afternoon,12977


In [17]:
#now depicting the tariff slab 
def calculate_tariff(units): #Takes an input units — which is the energy units consumed by a household.
    if units <= 2 :
        return 'Low'
    elif 2< units <= 5:
        return 'Medium'
    else:
        return 'High'
        
df['Tariff_Slab'] = df['Units_Consumed'].apply(calculate_tariff) #uns this function on every value in the 'Units_Consumed' column and the output is Tariff Slab
df.head()

Unnamed: 0,Household_ID,State,Region,Appliances,Units_Consumed,Usage Time,Monthly Income,Tariff_Slab
0,1,Kolkata,Rural,AC,3.24,Morning,16338,Medium
1,2,Delhi,Urban,Lights,0.91,Night,61761,Low
2,3,Jammu & Kashmir,Rural,TV,3.27,Evening,19622,Medium
3,4,Jammu & Kashmir,Rural,TV,7.29,Night,58423,High
4,5,Gujarat,Urban,TV,5.18,Night,52873,High


In [21]:
#Now creating interactive widgets
state_options = [''] + df['State'].unique().tolist()# we add an empty string as the default option , unnique() gives unique values to choose and . to list() is used to concatenate the empty list and the states numpy arraylist
state_widget = widgets.Dropdown(
    options = state_options, 
    value ='' , # Now it's safe to say "default = blank", because '' actually exists in the list.
    description = 'State:'
)
region_widget = widgets.Dropdown(
    options = ['All'] + df['Region'].unique().tolist(),
    value = 'All',  # Default selection
    description = 'Region:' #Label for the widget
)

    

In [23]:
#TIME OF THE DAY WIDGET 
#Create a multi-select widget to filter data by time of day (e.g., Morning, Afternoon, etc.)
time_widget = widgets.SelectMultiple(
#'options' parameter defines what the user can choose from in the widget
# We use .unique() to grab all the distinct time values from the 'Usage_Time' column (like Morning, Evening, Night)    
    options = df['Usage Time'].unique(),
# 'value' sets what options are selected by default when the widget is first displayed
# The widget expects this to be a list or tuple, not a NumPy array — so we wrap it in tuple()
# This line selects ALL available options by default (meaning no filtering is applied unless user changes it)  
    value = tuple(df['Usage Time'].unique()),
    description = 'Time of The Day'
)

In [29]:
display(state_widget, region_widget, time_widget)

Dropdown(description='State:', options=('', 'Kolkata', 'Delhi', 'Jammu & Kashmir', 'Gujarat', 'Mumbai'), value…

Dropdown(description='Region:', options=('All', 'Rural', 'Urban'), value='All')

SelectMultiple(description='Time of The Day', index=(0, 1, 2, 3), options=('Morning', 'Night', 'Evening', 'Aft…

In [31]:
state = state_widget.value
region = region_widget.value
time_list = list(time_widget.value)

In [60]:
def energy_dashboard(state, region, time_list):
    
#filtering the data frame 
    filtered = df.copy()
    if state != '':
        filtered = df[df['State'] == state] ## Filter the main DataFrame to include only rows where the 'State' matches the user's selection
# If the user has selected a specific Region (not 'All'), filter further by that region
    if region!= 'All!':
        filtered = filtered[filtered['Region'] == region]
# Filter the remaining data to include only rows where the 'Usage_Time' is in the selected list (e.g., Morning, Night)
    filtered = filtered[filtered['Usage Time'].isin(time_list)]
    display(filtered.head())
    print(f"Filtered Records : {len(filtered)}") #this is a simple print statement to tell you how many records (rows) are in your filtered dataset.

    #PLOTTING THE GRAPHS
    fig,axes = plt.subplots(3,1, figsize = (5,10))#This creates 3 vertically stacked plots (3 rows, 1 column).ig = the overall figure (like a big canvas) ,axes = a list of 3 individual plot areas on that canvas
#PLOT FOR APPLIANCE USAGE IN EACH STATE 
    sns.countplot( data = filtered , x = "Appliances", palette = 'mako', ax = axes[0])#ax=axes[0]: Draw this plot on the first subplot
    axes[0].set_title(f'Appliance Usage in {state}')
    axes[0].tick_params(axis = 'x',rotation = 45)

    filtered['Tariff_Slab'].value_counts().plot.pie(
        autopct='%1.1f%%', colors=sns.color_palette("pastel"), ax=axes[1])
    axes[1].set_title('Tariff Distribution')
    axes[1].set_ylabel('')

    sns.scatterplot(data=filtered, x='Monthly Income', y='Units_Consumed',
                hue='Region', ax=axes[2], alpha=0.6)
    axes[2].set_title('Income vs Consumption')

    plt.tight_layout()
    plt.show()

In [61]:
interactive_dashboard = interact(
    energy_dashboard,
    state=state_widget,
    region=region_widget,
    time_list=time_widget
)

display(interactive_dashboard)


interactive(children=(Dropdown(description='State:', index=4, options=('', 'Kolkata', 'Delhi', 'Jammu & Kashmi…

<function __main__.energy_dashboard(state, region, time_list)>

We use 'filtered' instead of 'df' to create a temporary, progressively filtered version of the dataset.
This avoids altering the original DataFrame and allows us to apply multiple filters step-by-step
(State → Region → Usage Time) without restarting from scratch or losing previous filters.


In [44]:
#PLOT FOR APPLIANCE USAGE IN EACH STATE 
sns.countplot( data = filtered , x = "Appliances", palette = 'mako', ax = axes[0])#ax=axes[0]: Draw this plot on the first subplot
axes[0].set_title(f'Appliance Usage in {state}')
axes[0].tick_params(axis = 'x',rotation = 45)
plt.show()

In [45]:
filtered['Tariff_Slab'].value_counts().plot.pie(
    autopct='%1.1f%%', colors=sns.color_palette("pastel"), ax=axes[1])
axes[1].set_title('Tariff Distribution')
axes[1].set_ylabel('')


Text(4.444444444444445, 0.5, '')

In [46]:
sns.scatterplot(data=filtered, x='Monthly Income', y='Units_Consumed',
                hue='Region', ax=axes[2], alpha=0.6)
axes[2].set_title('Income vs Consumption')

Text(0.5, 1.0, 'Income vs Consumption')

In [4]:
print(plt.style.available)

['Solarize_Light2', '_classic_test_patch', '_mpl-gallery', '_mpl-gallery-nogrid', 'bmh', 'classic', 'dark_background', 'fast', 'fivethirtyeight', 'ggplot', 'grayscale', 'petroff10', 'seaborn-v0_8', 'seaborn-v0_8-bright', 'seaborn-v0_8-colorblind', 'seaborn-v0_8-dark', 'seaborn-v0_8-dark-palette', 'seaborn-v0_8-darkgrid', 'seaborn-v0_8-deep', 'seaborn-v0_8-muted', 'seaborn-v0_8-notebook', 'seaborn-v0_8-paper', 'seaborn-v0_8-pastel', 'seaborn-v0_8-poster', 'seaborn-v0_8-talk', 'seaborn-v0_8-ticks', 'seaborn-v0_8-white', 'seaborn-v0_8-whitegrid', 'tableau-colorblind10']
