## numpy array

In [22]:
import numpy as np

# Завантаження даних
data = np.genfromtxt("household_power_consumption.txt", delimiter=';', dtype=None, names=True, encoding='utf-8')

# Заміна '?' на NaN
data = np.array(
    [tuple(np.nan if val == '?' else val for val in row) for row in data],
    dtype=data.dtype
)

# Конвертація числових даних
for col in ['Global_active_power', 'Global_reactive_power', 'Voltage', 'Global_intensity', 'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']:
    data[col] = np.where(data[col] == '?', np.nan, data[col].astype(np.float64))

# Форматування дати
def convert_date(date_str):
    parts = date_str.split('/')
    if len(parts[1]) == 1: 
        parts[1] = '0' + parts[1] 
    if len(parts[0]) == 1:  
        parts[0] = '0' + parts[0]  
    return np.datetime64('-'.join(reversed(parts)))

data['Date'] = np.array([convert_date(d) for d in data['Date']], dtype='datetime64[D]')



In [None]:
data

In [23]:
# Видалення рядків з NaN в 'Global_active_power'
data = data[~np.isnan(data['Global_active_power'].astype(np.float64))]

# Фільтри
df_1 = data[data['Global_active_power'].astype(np.float64) > 5]
df_2 = data[data['Voltage'].astype(np.float64) > 235]
df_3_1 = data[(data['Global_intensity'].astype(np.float64) >= 19) & (data['Global_intensity'].astype(np.float64) <= 20)]
df_3_2 = df_3_1[df_3_1['Sub_metering_2'].astype(np.float64) > df_3_1['Sub_metering_3'].astype(np.float64)]




In [36]:
df_1

array([('2006-12-16', '18:09:00', '4.464', '0.136', '234.66', '19.0', '0.0', '37.0', 16.),
       ('2006-12-17', '01:04:00', '4.582', '0.258', '238.08', '19.6', '0.0', '13.0',  0.),
       ('2006-12-17', '01:08:00', '4.618', '0.104', '239.61', '19.6', '0.0', '27.0',  0.),
       ...,
       ('2010-11-24', '07:55:00', '4.602', '0.0', '237.08', '19.4', '0.0', '40.0', 17.),
       ('2010-11-24', '07:56:00', '4.536', '0.0', '237.03', '19.0', '0.0', '39.0', 17.),
       ('2010-11-24', '07:57:00', '4.626', '0.0', '236.78', '19.4', '0.0', '39.0', 17.)],
      dtype=[('Date', '<U10'), ('Time', '<U8'), ('Global_active_power', '<U6'), ('Global_reactive_power', '<U5'), ('Voltage', '<U7'), ('Global_intensity', '<U6'), ('Sub_metering_1', '<U6'), ('Sub_metering_2', '<U6'), ('Sub_metering_3', '<f8')])

In [None]:
df_2

In [None]:
df_3_1

In [None]:
df_3_2

In [30]:
# Випадкові індекси
random_indices = np.random.choice(len(data), 500000, replace=False)
df_4 = data[random_indices]

mean_sub_metering_1 = np.nanmean(df_4['Sub_metering_1'].astype(np.float64))
mean_sub_metering_2 = np.nanmean(df_4['Sub_metering_2'].astype(np.float64))
mean_sub_metering_3 = np.nanmean(df_4['Sub_metering_3'].astype(np.float64))



In [39]:
df_4

array([('2010-09-22', '10:40:00', '1.336', '0.07', '240.82', '5.4', '0.0', '0.0', 18.),
       ('2009-09-18', '16:44:00', '0.172', '0.0', '243.12', '0.8', '0.0', '0.0',  0.),
       ('2008-05-03', '10:40:00', '1.36', '0.0', '237.74', '5.6', '2.0', '0.0', 18.),
       ...,
       ('2007-10-11', '18:55:00', '0.35', '0.13', '234.47', '1.6', '0.0', '0.0',  0.),
       ('2007-09-26', '23:36:00', '0.32', '0.102', '243.7', '1.4', '0.0', '1.0',  0.),
       ('2007-10-15', '10:43:00', '1.358', '0.086', '237.8', '5.6', '0.0', '0.0', 17.)],
      dtype=[('Date', '<U10'), ('Time', '<U8'), ('Global_active_power', '<U6'), ('Global_reactive_power', '<U5'), ('Voltage', '<U7'), ('Global_intensity', '<U6'), ('Sub_metering_1', '<U6'), ('Sub_metering_2', '<U6'), ('Sub_metering_3', '<f8')])

In [38]:
mean_sub_metering_1

6.431564

In [None]:
mean_sub_metering_2

In [None]:

mean_sub_metering_3

In [41]:
# Фільтри по часу та потужності
df_5_1 = data[(data['Time'] >= "18:00:00") & (data['Global_active_power'].astype(np.float64) > 6)]
df_5_2 = df_5_1[
    (df_5_1['Sub_metering_2'].astype(np.float64) > df_5_1['Sub_metering_1'].astype(np.float64)) & 
    (df_5_1['Sub_metering_2'].astype(np.float64) > df_5_1['Sub_metering_3'].astype(np.float64))
]

# Розділення та вибір кожного n-го елемента
half_index = len(df_5_2) // 2

df_5_2_first_half = df_5_2[:half_index]
df_5_2_first_half_every_third = df_5_2_first_half[::3]

df_5_2_second_half = df_5_2[half_index:]
df_5_2_second_half_every_fourth = df_5_2_second_half[::4]

# Об'єднання результатів
result = np.concatenate((df_5_2_first_half_every_third, df_5_2_second_half_every_fourth))


In [None]:
df_5_1

In [None]:
df_5_2

In [42]:
df_5_2_first_half_every_third

array([('2006-12-16', '18:05:00', '6.052', '0.192', '232.93', '26.2', '0.0', '37.0', 17.),
       ('2006-12-16', '18:08:00', '6.308', '0.116', '232.25', '27.0', '0.0', '36.0', 17.),
       ('2006-12-28', '20:58:00', '6.386', '0.374', '236.63', '27.0', '1.0', '36.0', 17.),
       ('2006-12-28', '21:02:00', '8.088', '0.262', '235.5', '34.4', '1.0', '72.0', 17.),
       ('2006-12-28', '21:05:00', '7.23', '0.152', '235.22', '30.6', '1.0', '73.0', 17.),
       ('2006-12-28', '21:08:00', '7.352', '0.0', '235.45', '31.2', '1.0', '73.0', 17.),
       ('2006-12-28', '21:11:00', '9.048', '0.0', '231.48', '39.0', '34.0', '71.0', 16.),
       ('2006-12-28', '21:14:00', '9.118', '0.108', '231.18', '39.4', '36.0', '72.0', 16.),
       ('2006-12-28', '21:17:00', '7.04', '0.13', '233.27', '30.2', '37.0', '38.0', 17.),
       ('2006-12-29', '21:16:00', '6.146', '0.116', '230.53', '26.6', '0.0', '70.0',  0.),
       ('2006-12-29', '21:19:00', '6.184', '0.138', '231.57', '26.6', '0.0', '70.0',  0.),
    

In [None]:
df_5_2_second_half_every_fourth

In [43]:
result

array([('2006-12-16', '18:05:00', '6.052', '0.192', '232.93', '26.2', '0.0', '37.0', 17.),
       ('2006-12-16', '18:08:00', '6.308', '0.116', '232.25', '27.0', '0.0', '36.0', 17.),
       ('2006-12-28', '20:58:00', '6.386', '0.374', '236.63', '27.0', '1.0', '36.0', 17.),
       ('2006-12-28', '21:02:00', '8.088', '0.262', '235.5', '34.4', '1.0', '72.0', 17.),
       ('2006-12-28', '21:05:00', '7.23', '0.152', '235.22', '30.6', '1.0', '73.0', 17.),
       ('2006-12-28', '21:08:00', '7.352', '0.0', '235.45', '31.2', '1.0', '73.0', 17.),
       ('2006-12-28', '21:11:00', '9.048', '0.0', '231.48', '39.0', '34.0', '71.0', 16.),
       ('2006-12-28', '21:14:00', '9.118', '0.108', '231.18', '39.4', '36.0', '72.0', 16.),
       ('2006-12-28', '21:17:00', '7.04', '0.13', '233.27', '30.2', '37.0', '38.0', 17.),
       ('2006-12-29', '21:16:00', '6.146', '0.116', '230.53', '26.6', '0.0', '70.0',  0.),
       ('2006-12-29', '21:19:00', '6.184', '0.138', '231.57', '26.6', '0.0', '70.0',  0.),
    