Perform the necessary imports

In [2]:
import os

import pandas as pd
import psycopg2
import dotenv
import pandas

Establish a connection to the database

In [3]:
dotenv.load_dotenv()
params = {
    'database': os.getenv("DB_DATABASE"),
    'user': os.getenv("DB_USER"),
    'password': os.getenv("DB_PASSWORD"),
    'host': os.getenv("DB_HOST"),
    'port': os.getenv("DB_PORT"),
}
conn = psycopg2.connect(**params)


In [5]:
cursor = conn.cursor()

Retrieve the data from the database

In [6]:
cursor.execute(
""" 
WITH
_ts(created_at, time, transacted_price, fee_sum) AS (
	SELECT 
	    created_at
		, time_bucket_gapfill('1 minute', created_at) AS time
		, transacted_price
		, SUM(2*transaction_fee) OVER ( ORDER BY created_at
							           ROWS UNBOUNDED PRECEDING 
							          ) fee_sum
	FROM transactions
)  -- SELECT * FROM _ts; /*
, _ts_bucketed AS (
	SELECT 
		time
		, AVG(transacted_price) AS transacted_price
		, MAX(fee_sum) AS fee_sum
	FROM _ts 
	GROUP BY time
	ORDER BY time
) -- SELECT * FROM _ts_bucketed; /*
, _b_funds(time, funds_total) AS (
	SELECT time, SUM(amount)
	FROM (
		SELECT
			time_bucket_gapfill('1 minute', created_at) AS time
			, amount
		FROM funds
	)
	GROUP BY time
	ORDER BY time
) -- SELECT * FROM _b_funds; /*
, _funds_agg AS (
	SELECT
	time
	, SUM(funds_total) OVER (
		ORDER BY time
		ROWS UNBOUNDED PRECEDING
	) funds_total
	FROM _b_funds
) --  SELECT * FROM _funds_agg; /*
, _total_impedance AS (
	SELECT time, AVG(total_impedance) AS total_impedance FROM
		(SELECT time_bucket_gapfill('1 minute', tl.created_at) AS time, tl.tl_val+c.c_val AS total_impedance FROM 
		(SELECT created_at, SUM(transmission_line_impedance) AS tl_val 
			FROM (
				SELECT created_at, SQRT(transmission_line_resistance::float8*transmission_line_resistance::float8 + transmission_line_inductance::float8*transmission_line_inductance::float8) AS transmission_line_impedance
				FROM (
					SELECT created_at, transmission_line_resistance::float8, transmission_line_length::float8* transmission_line_inductance_per_meter::float8 as transmission_line_inductance
					FROM (
						SELECT created_at,
						jsonb_array_elements(jsonb_array_elements(grid_state->'circuits')->'loads')->'load_type'->'TransmissionLine'->'resistance' as transmission_line_resistance,
						jsonb_array_elements(jsonb_array_elements(grid_state->'circuits')->'loads')->'load_type'->'TransmissionLine'->'length' as transmission_line_length,
						jsonb_array_elements(jsonb_array_elements(grid_state->'circuits')->'loads')->'load_type'->'TransmissionLine'->'inductance_per_meter' as transmission_line_inductance_per_meter
						FROM grid_history
					)
				)
			) 
			WHERE transmission_line_impedance <> 0 GROUP BY created_at) AS tl 
			INNER JOIN (
				SELECT created_at, SUM(consumer_resistance) AS c_val
				FROM (
					SELECT created_at, (jsonb_array_elements(jsonb_array_elements(grid_state->'circuits')->'loads')->'load_type'->'Consumer'->'resistance')::float8 AS consumer_resistance
					FROM grid_history
				)
				WHERE consumer_resistance <> 0
				GROUP BY created_at
			) AS c 
			ON c.created_at = tl.created_at)
	GROUP BY time
	ORDER BY time
) -- SELECT * FROM _total_impedance; /*
, _consumer_voltage AS (
	SELECT time, AVG(consumer_voltage) AS consumer_voltage
	FROM
		(SELECT time_bucket_gapfill('1 minute', created_at) AS time, consumer_voltage
		FROM
			(SELECT created_at, AVG(voltage) AS consumer_voltage
				FROM (
					SELECT created_at,(jsonb_array_elements(jsonb_array_elements(grid_state->'circuits')->'loads')->'load_type'->'Consumer'->'voltage'->'oscilloscope_detail'->'amplitude')::float8 AS voltage
					FROM grid_history
				) 
				WHERE voltage <> 0 GROUP BY created_at
			)
		)
	GROUP BY time
	ORDER BY time
) -- SELECT * FROM _consumer_voltage; /*
, _transmission_line_voltage AS (
	SELECT time, AVG(transmission_line_voltage) AS transmission_line_voltage
	FROM
		(SELECT time_bucket_gapfill('1 minute', created_at) AS time, transmission_line_voltage
		FROM
			(SELECT created_at, AVG(voltage) AS transmission_line_voltage
				FROM (
					SELECT created_at,(jsonb_array_elements(jsonb_array_elements(grid_state->'circuits')->'loads')->'load_type'->'TransmissionLine'->'voltage'->'oscilloscope_detail'->'amplitude')::float8 AS voltage
					FROM grid_history
				) 
				WHERE voltage <> 0 GROUP BY created_at
			)
		)
	GROUP BY time
	ORDER BY time
) -- SELECT * FROM _transmission_line_voltage; /*
, _generator_voltage AS (
	SELECT time, AVG(generator_voltage) AS generator_voltage
	FROM
		(SELECT time_bucket_gapfill('1 minute', created_at) AS time, generator_voltage
		FROM
			(SELECT created_at, AVG(voltage) AS generator_voltage
				FROM (
					SELECT created_at, (jsonb_array_elements(jsonb_array_elements(grid_state->'circuits')->'generators')->'voltage'->'oscilloscope_detail'->'amplitude')::float8 AS voltage
					FROM grid_history
				) 
				WHERE voltage <> 0 GROUP BY created_at
			)
		)
	GROUP BY time
	ORDER BY time
) -- SELECT * FROM _generator_voltage; /*
, _appliance_data AS (
	WITH 
	_air_conditioner_data AS (
		SELECT time, AVG(air_conditioner) AS air_conditioner
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS air_conditioner
			FROM appliance_data
			WHERE appliance = 'air_conditioner' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _air_conditioner_data; /*
	, _air_purifier_data AS (
		SELECT time, AVG(air_purifier) AS air_purifier
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS air_purifier
			FROM appliance_data
			WHERE appliance = 'air_purifier' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _air_purifier_data; /*
	, _boiler_data AS (
		SELECT time, AVG(boiler) AS boiler
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS boiler
			FROM appliance_data
			WHERE appliance = 'boiler' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _boiler_data; /*
	, _coffee_data AS (
		SELECT time, AVG(coffee) AS coffee
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS coffee
			FROM appliance_data
			WHERE appliance = 'coffee' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _coffee_data; /*
	, _computer_data AS (
		SELECT time, AVG(computer) AS computer
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS computer
			FROM appliance_data
			WHERE appliance = 'computer' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _computer_data; /*
	, _dehumidifier_data AS (
		SELECT time, AVG(dehumidifier) AS dehumidifier
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS dehumidifier
			FROM appliance_data
			WHERE appliance = 'dehumidifier' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _dehumidifier_data; /*
	, _dishwasher_data AS (
		SELECT time, AVG(dishwasher) AS dishwasher
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS dishwasher
			FROM appliance_data
			WHERE appliance = 'dishwasher' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _dishwasher_data; /*
	, _dryer_data AS (
		SELECT time, AVG(dryer) AS dryer
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS dryer
			FROM appliance_data
			WHERE appliance = 'dryer' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _dryer_data; /*
	, _fan_data AS (
		SELECT time, AVG(fan) AS fan
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS fan
			FROM appliance_data
			WHERE appliance = 'fan' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _fan_data; /*
	, _freezer_data AS (
		SELECT time, AVG(freezer) AS freezer
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS freezer
			FROM appliance_data
			WHERE appliance = 'freezer' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _freezer_data; /*
	, _fridge_data AS (
		SELECT time, AVG(fridge) AS fridge
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS fridge
			FROM appliance_data
			WHERE appliance = 'fridge' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _fridge_data; /*
	, _internet_router_data AS (
		SELECT time, AVG(internet_router) AS internet_router
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS internet_router
			FROM appliance_data
			WHERE appliance = 'internet_router' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _internet_router_data; /*
	, _laptop_data AS (
		SELECT time, AVG(laptop) AS laptop
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS laptop
			FROM appliance_data
			WHERE appliance = 'laptop' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _laptop_data; /*
	, _micro_wave_oven_data AS (
		SELECT time, AVG(micro_wave_oven) AS micro_wave_oven
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS micro_wave_oven
			FROM appliance_data
			WHERE appliance = 'micro_wave_oven' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _micro_wave_oven_data; /*
	, _phone_charger_data AS (
		SELECT time, AVG(phone_charger) AS phone_charger
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS phone_charger
			FROM appliance_data
			WHERE appliance = 'phone_charger' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _phone_charger_data; /*
	, _printer_data AS (
		SELECT time, AVG(printer) AS printer
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS printer
			FROM appliance_data
			WHERE appliance = 'printer' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _printer_data; /*
	, _printer_3D_data AS (
		SELECT time, AVG(printer_3D) AS printer_3D
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS printer_3D
			FROM appliance_data
			WHERE appliance = 'printer_3D' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _printer_3D_data; /*
	, _radiator_data AS (
		SELECT time, AVG(radiator) AS radiator
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS radiator
			FROM appliance_data
			WHERE appliance = 'radiator' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _radiator_data; /*
	, _screen_data AS (
		SELECT time, AVG(screen) AS screen
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS screen
			FROM appliance_data
			WHERE appliance = 'screen' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _screen_data; /*
	, _solar_panel_data AS (
		SELECT time, AVG(solar_panel) AS solar_panel
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS solar_panel
			FROM appliance_data
			WHERE appliance = 'solar_panel' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _solar_panel_data; /*
	, _sound_system_data AS (
		SELECT time, AVG(sound_system) AS sound_system
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS sound_system
			FROM appliance_data
			WHERE appliance = 'sound_system' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _sound_system_data; /*
	, _tv_data AS (
		SELECT time, AVG(tv) AS tv
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS tv
			FROM appliance_data
			WHERE appliance = 'tv' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _tv_data; /*
	, _vacuum_data AS (
		SELECT time, AVG("vacuum") AS "vacuum"
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS "vacuum"
			FROM appliance_data
			WHERE appliance = 'vacuum' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _vacuum_data; /*
	, _washing_machine_data AS (
		SELECT time, AVG(washing_machine) AS washing_machine
		FROM (
			SELECT time_bucket_gapfill('1 minute', time) AS time, data AS washing_machine
			FROM appliance_data
			WHERE appliance = 'washing_machine' 
		)
		GROUP BY time
		ORDER BY time
	) -- SELECT * FROM _washing_machine_data; /*
	SELECT _tv_data.time
		, to_char(_tv_data.time, 'HH24:MI') AS day_time
		, _air_conditioner_data.air_conditioner
		, _air_purifier_data.air_purifier
		, _boiler_data.boiler
		, _coffee_data.coffee
		, _computer_data.computer
		, _dehumidifier_data.dehumidifier
		, _dishwasher_data.dishwasher
		, _dryer_data.dryer
		, _fan_data.fan
		, _freezer_data.freezer
		, _fridge_data.fridge
		, _internet_router_data.internet_router
		, _laptop_data.laptop
		, _micro_wave_oven_data.micro_wave_oven
		, _phone_charger_data.phone_charger
		, _printer_data.printer
		, _printer_3D_data.printer_3D
		, _radiator_data.radiator
		, _screen_data.screen
		, _solar_panel_data.solar_panel
		, _sound_system_data.sound_system
		, _tv_data.tv
		, _vacuum_data."vacuum"
		, _washing_machine_data.washing_machine
	FROM _air_conditioner_data
		JOIN _air_purifier_data ON _air_conditioner_data.time = _air_purifier_data.time
		JOIN _boiler_data ON _air_conditioner_data.time = _boiler_data.time
		JOIN _coffee_data ON _air_conditioner_data.time = _coffee_data.time
		JOIN _computer_data ON _air_conditioner_data.time = _computer_data.time
		JOIN _dehumidifier_data ON _air_conditioner_data.time = _dehumidifier_data.time 
		JOIN _dishwasher_data ON _air_conditioner_data.time = _dishwasher_data.time 
		JOIN _dryer_data ON _air_conditioner_data.time = _dryer_data.time 
		JOIN _fan_data ON _air_conditioner_data.time = _fan_data.time 
		JOIN _freezer_data ON _air_conditioner_data.time = _freezer_data.time 
		JOIN _fridge_data ON _air_conditioner_data.time = _fridge_data.time
		JOIN _internet_router_data ON _air_conditioner_data.time = _internet_router_data.time 
		JOIN _laptop_data ON _air_conditioner_data.time = _laptop_data.time 
		JOIN _micro_wave_oven_data ON _air_conditioner_data.time = _micro_wave_oven_data.time 
		JOIN _phone_charger_data ON _air_conditioner_data.time = _phone_charger_data.time
		JOIN _printer_data ON _air_conditioner_data.time = _printer_data.time 
		JOIN _printer_3D_data ON _air_conditioner_data.time = _printer_3D_data.time 
		JOIN _radiator_data ON _air_conditioner_data.time = _radiator_data.time 
		JOIN _screen_data ON _air_conditioner_data.time = _screen_data.time 
		JOIN _solar_panel_data ON _air_conditioner_data.time = _solar_panel_data.time 
		JOIN _sound_system_data ON _air_conditioner_data.time = _sound_system_data.time 
		JOIN _tv_data ON _air_conditioner_data.time = _tv_data.time 
		JOIN _vacuum_data ON _air_conditioner_data.time = _vacuum_data.time 
		JOIN _washing_machine_data ON _air_conditioner_data.time = _washing_machine_data.time
) -- SELECT * FROM _appliance_data; /*
, _num_open_buys AS (
	SELECT time, COUNT(*) AS num_open_buys
	FROM (
		SELECT time, buy_order_id, sought_units
			, SUM(transacted_units) OVER (
				PARTITION BY buy_order_id
				ORDER BY time
				ROWS UNBOUNDED PRECEDING
			) running_ts_units
		FROM (
			SELECT time, buy_order_id, sought_units, SUM(transacted_units) AS transacted_units
			FROM (
				SELECT time_bucket_gapfill('1 minute', transactions.created_at) AS time
				, transactions.buy_order_id, transacted_units, sought_units 
				FROM transactions
				INNER JOIN buy_orders ON transactions.buy_order_id = buy_orders.buy_order_id
			)
			GROUP BY time, buy_order_id, sought_units
		)
	)
	WHERE sought_units <= running_ts_units
	GROUP BY time
	ORDER BY time
) -- SELECT * FROM _num_open_buys; /*
, _num_open_sells AS (
	SELECT time, COUNT(*) AS num_open_sells
	FROM (
		SELECT time, sell_order_id, offered_units
			, SUM(transacted_units) OVER (
				PARTITION BY sell_order_id
				ORDER BY time
				ROWS UNBOUNDED PRECEDING
			) running_ts_units
		FROM (
			SELECT time, sell_order_id, offered_units, SUM(transacted_units) AS transacted_units
			FROM (
				SELECT time_bucket_gapfill('1 minute', transactions.created_at) AS time
				, transactions.sell_order_id, transacted_units, offered_units 
				FROM transactions
				INNER JOIN sell_orders ON transactions.sell_order_id = sell_orders.sell_order_id
			)
			GROUP BY time, sell_order_id, offered_units
		)
	)
	WHERE offered_units <= running_ts_units
	GROUP BY time
	ORDER BY time
) -- SELECT * FROM _num_open_sells; /*
, _ts_data AS (
	SELECT _ts_bucketed.time
		, to_char(_ts_bucketed.time, 'HH24:MI') AS day_time
		, fee_sum
		, funds_total
		, total_impedance
		, LAG(total_impedance) OVER (
			ROWS UNBOUNDED PRECEDING
		  ) lag_total_impedance
		, consumer_voltage
		, LAG(consumer_voltage) OVER (
			ROWS UNBOUNDED PRECEDING
		  ) lag_consumer_voltage
		, transmission_line_voltage
		, LAG(transmission_line_voltage) OVER (
			ROWS UNBOUNDED PRECEDING
		  ) lag_transmission_line_voltage
		, generator_voltage
		, LAG(generator_voltage) OVER (
			ROWS UNBOUNDED PRECEDING
		  ) lag_generator_voltage
		, num_open_buys
		, num_open_sells
		, transacted_price
	FROM _ts_bucketed
		LEFT JOIN _funds_agg ON _ts_bucketed.time = _funds_agg.time
		LEFT JOIN _total_impedance ON _ts_bucketed.time = _total_impedance.time
		LEFT JOIN _consumer_voltage ON _ts_bucketed.time = _consumer_voltage.time
		LEFT JOIN _transmission_line_voltage ON _ts_bucketed.time = _transmission_line_voltage.time
		LEFT JOIN _generator_voltage ON _ts_bucketed.time = _generator_voltage.time
		LEFT JOIN _num_open_buys ON _ts_bucketed.time = _num_open_buys.time
		LEFT JOIN _num_open_sells ON _ts_bucketed.time = _num_open_sells.time
) -- SELECT * FROM _ts_data; /*
, _combined_table AS (SELECT
	 _ts_data.day_time
	, CAST(EXTRACT(HOUR FROM _ts_data.time) AS INTEGER) AS day_hour
	, CAST(EXTRACT(MINUTE FROM _ts_data.time) AS INTEGER) AS day_minute
	, fee_sum
	, funds_total
	, COALESCE(total_impedance, lag_total_impedance) AS total_impedance
	, COALESCE(consumer_voltage, lag_consumer_voltage) AS consumer_voltage
	, COALESCE(transmission_line_voltage, lag_transmission_line_voltage) 
		AS transmission_line_voltage
	, COALESCE(generator_voltage, lag_generator_voltage) AS generator_voltage
	, COALESCE(num_open_buys, 0) AS num_open_buys
	, COALESCE(num_open_sells, 0) AS num_open_sells
	, transacted_price
	, air_conditioner
	, air_purifier
	, boiler
	, coffee
	, computer
	, dehumidifier
	, dishwasher
	, dryer
	, fan
	, freezer
	, fridge
	, internet_router
	, laptop
	, micro_wave_oven
	, phone_charger
	, printer
	, printer_3D
	, radiator
	, screen
	, solar_panel
	, sound_system
	, tv
	, "vacuum"
	, washing_machine
FROM _ts_data
	LEFT JOIN _appliance_data ON _ts_data.day_time = _appliance_data.day_time
)
SELECT 
	day_hour
	, day_minute
	, fee_sum
	, funds_total
	, total_impedance
	, consumer_voltage
	, transmission_line_voltage
	, generator_voltage
	, num_open_buys
	, num_open_sells
	, transacted_price
	, air_conditioner
	, air_purifier
	, boiler
	, coffee
	, computer
	, dehumidifier
	, dishwasher
	, dryer
	, fan
	, freezer
	, fridge
	, internet_router
	, laptop
	, micro_wave_oven
	, phone_charger
	, printer
	, printer_3D
	, radiator
	, screen
	, solar_panel
	, sound_system
	, tv
	, "vacuum"
	, washing_machine
FROM _combined_table;

--*/ --*/ ---*/ ----*/ ----*/
""")
data = cursor.fetchall()

Close the database connection

In [7]:
cursor.close()
conn.close()

Load the data into a dataframe

In [8]:
colum_names = ["day_hour", "day_minute", "fee_sum", "funds_total", "total_impedance", "consumer_voltage", 
               "transmission_line_voltage", "generator_voltage", "num_open_buys", "num_open_sells", "transacted_price",
               "air_conditioner", "air_purifier", "boiler", "coffee", "computer", "dehumidifier", "dishwasher", "dryer",
               "fan", "freezer", "fridge", "internet_router", "laptop", "micro_wave_oven", "phone_charger", "printer",
               "printer_3D", "radiator", "screen", "solar_panel", "sound_system", "tv", "vacuum", "washing_machine"]
df = pandas.DataFrame(data, columns=colum_names)
df.head()

Unnamed: 0,day_hour,day_minute,fee_sum,funds_total,total_impedance,consumer_voltage,transmission_line_voltage,generator_voltage,num_open_buys,num_open_sells,...,phone_charger,printer,printer_3D,radiator,screen,solar_panel,sound_system,tv,vacuum,washing_machine
0,13,23,0.589171,4656.020254,157.385598,31.057642,158084.657392,240.0,0,1,...,9.289218,2.768645,2.768645,747.894435,10.137828,118.148853,11.907443,1.867,0.307333,6.146407
1,13,24,8.749309,10121.201925,5839.488902,47.27268,181374.145869,240.0,0,12,...,8.605125,2.655464,2.655464,795.530225,10.20015,124.127505,11.905991,1.867,0.042333,2.045057
2,13,25,9.424906,15809.780906,11513.591402,47.27268,181374.145869,240.0,0,1,...,9.626712,2.654368,2.654368,683.198817,10.124295,127.242409,11.904542,1.867,0.155333,3.244823
3,13,26,13.125708,22067.985544,19828.607002,5.948813,21302.357617,42.799664,0,5,...,10.179646,2.596887,2.596887,838.580524,10.180924,127.951759,11.897311,1.867,0.206,4.7421
4,13,27,20.873635,27867.710275,22505.183002,5.948813,21302.357617,40.204923,0,9,...,9.998465,2.692293,2.692293,677.26853,10.242432,132.228627,11.920215,1.867,0.358333,2.119633


Split the data

In [9]:
from sklearn.model_selection import train_test_split
features = ["day_hour", "day_minute", "fee_sum", "funds_total", "total_impedance", "consumer_voltage",
            "transmission_line_voltage", "generator_voltage", "num_open_buys", "num_open_sells", "air_conditioner",
            "air_purifier", "boiler", "coffee", "computer", "dehumidifier", "dishwasher", "dryer","fan", "freezer", 
            "fridge", "internet_router", "laptop", "micro_wave_oven", "phone_charger", "printer", "printer_3D", 
            "radiator", "screen", "solar_panel", "sound_system", "tv", "vacuum", "washing_machine"]
X = df[features]
y = df["transacted_price"]
X_train, X_test, y_train, y_test = train_test_split(X, y)

Standardise the data

In [10]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

Create an ensemble model

In [11]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import VotingRegressor

lr = LinearRegression()
dtr = DecisionTreeRegressor()
svr = SVR()
mlpr = MLPRegressor(solver='sgd')
voting_regressor = VotingRegressor(estimators=[("lr", lr), ("dtr", dtr), ("svr", svr), ("mlpr", mlpr)])

Create the pipeline

In [12]:
from sklearn.pipeline import Pipeline
pipe = Pipeline([('scaler', scaler), ('voting_regressor', voting_regressor)])
pipe.fit(X_train, y_train)

In [13]:
pipe.score(X_test, y_test)

0.8419512876212203

Pickle the model

In [15]:
import pickle
with open("price-model.pkl", 'wb') as f:
    pickle.dump(pipe, f)