In [18]:
import numpy as np
np.set_printoptions(suppress=True, precision=3)

# Household power consumption
## Numpy implementation
[Data source](https://archive.ics.uci.edu/ml/datasets/individual+household+electric+power+consumption)

In [19]:
#if you dont have data locally 

#import data
#data.download_power_ds()

In [20]:
%%time
power_data = np.genfromtxt("data/household_power_consumption.txt", delimiter=';', usecols=range(2, 9), skip_header=True)

power_date = np.loadtxt("data/household_power_consumption.txt", delimiter=';', dtype=str, usecols=range(0,2), skiprows=1)

CPU times: user 31.7 s, sys: 1.19 s, total: 32.9 s
Wall time: 33 s


In [21]:
%%time

def split_datetime(row):
    result = []
    for item in row[0].split('/'):
        result.append(item)
    for item in row[1].split(':')[:2]:
        result.append(item)
    return result

dts = np.apply_along_axis(split_datetime, 1, power_date)
power = np.concatenate((dts, power_data), axis=1).astype('float')

CPU times: user 45.4 s, sys: 1.05 s, total: 46.4 s
Wall time: 46.4 s


In [22]:
power = power[~np.any(np.isnan(power), axis=1),:]

## Task 1
Select all rows where Active > 5 kW

In [23]:
power[power[:,5] > 5,:]

array([[  16.,   12., 2006., ...,    0.,    1.,   16.],
       [  16.,   12., 2006., ...,    0.,    2.,   17.],
       [  16.,   12., 2006., ...,    0.,    1.,   17.],
       ...,
       [  24.,   11., 2010., ...,    0.,   38.,   17.],
       [  24.,   11., 2010., ...,    0.,   39.,   17.],
       [  25.,   11., 2010., ...,    1.,    2.,   18.]])

## Task 2
Select all rows where Voltage > 235 V

In [24]:
power[power[:,7] > 235,:]

array([[  16.,   12., 2006., ...,    0.,    1.,   17.],
       [  16.,   12., 2006., ...,    0.,    2.,   17.],
       [  16.,   12., 2006., ...,    0.,    1.,   17.],
       ...,
       [  26.,   11., 2010., ...,    0.,    0.,    0.],
       [  26.,   11., 2010., ...,    0.,    0.,    0.],
       [  26.,   11., 2010., ...,    0.,    0.,    0.]])

## Task 3
Select all where met2 > met3 from rows where Intensity in \[19,20\]

In [25]:
condition = np.logical_and.reduce([ power[:,8] >= 19, power[:,8] <= 20, power[:,10] > power[:,11] ])

power[condition,:]

array([[  16.,   12., 2006., ...,    0.,   37.,   16.],
       [  17.,   12., 2006., ...,    0.,   13.,    0.],
       [  17.,   12., 2006., ...,    0.,   27.,    0.],
       ...,
       [  24.,   11., 2010., ...,    0.,   40.,   17.],
       [  24.,   11., 2010., ...,    0.,   39.,   17.],
       [  24.,   11., 2010., ...,    0.,   39.,   17.]])

## Task 4
Select 500000 random unique rows. Calculate mean of all mets.

In [26]:
rnd_row_index = np.random.choice(np.arange(0,len(power)), 500000, replace=False)
power[rnd_row_index,:]

array([[  24.,   12., 2006., ...,    0.,    2.,   18.],
       [   1.,    2., 2009., ...,    0.,    0.,   18.],
       [  26.,    4., 2008., ...,    0.,    0.,    1.],
       ...,
       [  11.,    7., 2008., ...,    0.,    0.,   13.],
       [   5.,    2., 2008., ...,    0.,    0.,   17.],
       [  21.,    5., 2009., ...,    0.,    0.,    1.]])

In [27]:
def print_mean(array : np.ndarray, column : int):
    mean = np.mean(power[:,column])
    print(f"Column {column}: {mean}")

print_mean(power, 9)
print_mean(power, 10)
print_mean(power, 11)

Column 9: 1.1219233096502186
Column 10: 1.2985199679887571
Column 11: 6.45844735712055


## Task 5
From night(>=18:00) rows select all where minute consumption > 6kW

Split this subset by max met and for each select every third from 1 part and every fourth from 2 part

In [28]:
condition = (power[:,3] >= 18) & (power[:,5] > 6)

power_night = power[condition,:]
power_night

array([[  16.,   12., 2006., ...,    0.,   37.,   17.],
       [  16.,   12., 2006., ...,    0.,   36.,   17.],
       [  16.,   12., 2006., ...,    0.,   37.,   16.],
       ...,
       [  20.,   11., 2010., ...,   13.,   39.,   16.],
       [  20.,   11., 2010., ...,   21.,   34.,   17.],
       [  20.,   11., 2010., ...,   17.,   34.,   16.]])

In [29]:
met1_max_condition = (power_night[:,9] >= power_night[:,10]) & (power_night[:,9] >= power_night[:,11])
met2_max_condition = (power_night[:,10] > power_night[:,9]) & (power_night[:,10] >= power_night[:,11])
met3_max_condition = (power_night[:,11] > power_night[:,9]) & (power_night[:,11] > power_night[:,10])

power_met1 = power_night[met1_max_condition,:]
power_met2 = power_night[met2_max_condition,:]
power_met3 = power_night[met3_max_condition,:]

In [30]:
met1_first, met1_second = np.array_split(power_met1, 2)
met2_first, met2_second = np.array_split(power_met2, 2)
met3_first, met3_second = np.array_split(power_met3, 2)

In [31]:
met1_first[::3]

array([[  22.,   12., 2006., ...,   20.,    0.,   16.],
       [  22.,   12., 2006., ...,   36.,    0.,   16.],
       [  22.,   12., 2006., ...,   36.,    0.,   17.],
       ...,
       [   2.,    2., 2008., ...,   36.,   34.,   16.],
       [   2.,    2., 2008., ...,   36.,   34.,   17.],
       [   2.,    2., 2008., ...,   37.,   34.,   17.]])

In [32]:
met1_second[::4]

array([[   2.,    2., 2008., ...,   36.,   34.,   16.],
       [   2.,    2., 2008., ...,   35.,   35.,   16.],
       [   2.,    2., 2008., ...,   35.,   34.,   15.],
       ...,
       [   4.,   11., 2010., ...,   40.,   35.,    0.],
       [   4.,   11., 2010., ...,   80.,    0.,    1.],
       [   4.,   11., 2010., ...,   80.,    0.,    1.]])