In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# load the data
# Subset the data from the given dates(December 2006 to November 2009)
# create a histogram
# create a time series plot
# create a plot for sub metering
# create a multiple plot
# Show different Visualisation of given dataset
# Scatterplot, histogram,Bar Chart, Pie Chart, Count plot,BoxPlot, Heatmap, Distplot, Jointplot
# Dataset Link : https://archive.ics.uci.edu/dataset/235/individual+household+electric+power+consumption

In [None]:
# Part 1: Load the data
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

In [None]:
# The dataset is space-separated
data = pd.read_csv('power_consumption.txt', sep=';',
                   parse_dates={'dt' : ['Date', 'Time']}, infer_datetime_format=True,
                   low_memory=False, na_values=['nan','?'], index_col='dt')

In [None]:
# Dropping missing data for simplicity
data.dropna(inplace=True)

In [None]:
# Part 2: Subset the data from December 2006 to November 2009
data = data.loc['2006-12-01':'2009-11-30']

In [None]:
# Part 3: Create a histogram
# Histogram for Global active power
data['Global_active_power'].hist()
plt.title('Global Active Power')
plt.show()

In [None]:
# Part 4: Create a time series plot
# Time series plot for Global active power
data['Global_active_power'].plot()
plt.title('Global Active Power Time Series')
plt.show()

In [None]:
# Part 5: Create a plot for sub metering
# Plots for Sub_metering_1, Sub_metering_2 and Sub_metering_3
data[['Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']].plot(subplots=True)
plt.title('Sub Metering Time Series')
plt.show()

In [None]:
# Part 6: Create a multiple plot
# Scatterplot of Global Active Power and Global Reactive Power
data.plot(kind='scatter', x='Global_active_power', y='Global_reactive_power')
plt.title('Scatter plot: Active Power vs Reactive Power')
plt.show()

In [None]:
# Part 7: Show different Visualisation of given dataset
# Note: We will show only a subset of the visualizations due to the size of the data

In [None]:
# Scatterplot
data.plot(kind='scatter', x='Voltage', y='Global_intensity')
plt.title('Scatter plot: Voltage vs Global Intensity')
plt.show()

In [None]:
# BoxPlot
data[['Global_active_power', 'Global_reactive_power']].plot(kind='box')
plt.title('Boxplot: Global active power and Global reactive power')
plt.show()

In [None]:
# Bar Chart
data[['Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']].sum().plot(kind='bar')
plt.title('Bar Chart: Sum of sub meterings')
plt.show()

In [None]:
# Heatmap
corr = data.corr()
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Heatmap: Correlation Matrix')
plt.show()

In [None]:
# Pie Chart
# Since pie chart is typically for categorical data, we will create a simple one using sub metering data
metering_sum = data[['Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']].sum()
metering_sum.plot(kind='pie', autopct='%1.1f%%')
plt.title('Pie Chart: Sub Metering')
plt.show()

In [None]:
# Count plot
# Since count plot is typically for categorical data, we can use it on a resampled data
# Let's resample our data to have daily frequency
daily_data = data.resample('D').mean()
# Create a new categorical column for high and low active power days
daily_data['High_Low'] = ['High' if i > daily_data['Global_active_power'].mean() else 'Low' for i in daily_data['Global_active_power']]
sns.countplot(x=daily_data['High_Low'])
plt.title('Count plot: High/Low Active Power days')
plt.show()

In [None]:
# Distplot (Distribution plot)
# Let's use it on Global_active_power
sns.displot(data['Global_active_power'])
plt.title('Distplot: Global Active Power')
plt.show()

In [None]:
# Jointplot
# This can be used to compare two distributions. Let's compare Global_active_power and Global_reactive_power
sns.jointplot(x=data['Global_active_power'], y=data['Global_reactive_power'], kind='scatter')
plt.title('Jointplot: Global Active Power and Global Reactive Power')
plt.show()