In [None]:
# The input data is in minute intervals, and the model is trained to predict
# the total real power consumption for a single minute based on the input 
# features, so the predicted value represents the predicted total real power
# consumption in kilowatts for a single minute.
# It's worth noting that the power consumption of a household 
# can vary widely over time, and is influenced by many factors, 
# such as the time of day, day of the week, season, and weather. 
# So the accuracy of the predictions may be limited by the amount of data 
# available and the complexity of the model.

# Also, it's important to consider that the power consumption of a household 
# may not always directly translate to the cost of electricity in NOK, 
# as the price of electricity can vary depending on the time of day, 
# the day of the week, and other factors such as taxes and subsidies. 
# So it's important to take these factors into account when interpreting 
# the predictions and calculating the cost of electricity.

#Example:

#predicted_price = predicted_power_consumption * price_per_kwh

#predicted_price = 0.438 kW * 1.50 NOK/kWh = 0.657 NOK

#The model predicts kW based on minute intervals,
#so the predicted cost for 1 min with here was 0.657.


In [None]:
#Imports for the project.

import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [None]:
# Load the dataset
df = pd.read_csv('household_power_consumption.txt', delimiter=';', parse_dates=True, infer_datetime_format=True)

In [None]:
# Explore the data
print(df.head())

In [None]:
# Use tqdm to add a progress bar to the apply method
tqdm.pandas()

In [None]:
# Drop any rows with missing values
df.dropna(inplace=True)

In [None]:
# Convert the date and time columns to a single datetime column
df['datetime'] = df.progress_apply(lambda row: pd.to_datetime(row['Date'] + ' ' + row['Time']), axis=1)

In [None]:
# Drop the original date and time columns
df.drop(['Date', 'Time'], axis=1, inplace=True)

In [None]:
# Set the datetime column as the index
df.set_index('datetime', inplace=True)

In [None]:
print(df.columns)

In [None]:
# Create a target column for the next day's global_active_power value
df['target'] = df['Global_active_power'].shift(-1)

In [None]:
# Drop the last row, which has a NaN value for the target
df.dropna(inplace=True)

In [None]:
# Remove the 'target' column and fit the MinMaxScaler to the remaining feature data
scaler = MinMaxScaler()
scaler.fit(df.drop('target', axis=1))

In [None]:
# Split the dataset into features and labels
X = df.drop('target', axis=1)
y = df['target']

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)


In [None]:
# Create a new row of feature data to predict the next day's electricity price
new_data = np.array([[0.5, 0.3, 0.2, 0.4, 0.5, 0.6, 0.5]])

In [None]:
# Verify that the number of columns in the input data matches the number of columns in the data used to fit the scaler
if new_data.shape[1] != scaler.data_max_.shape[0]:
    raise ValueError(f"Expected {scaler.data_max_.shape[0]} features, but got {new_data.shape[1]} features")

In [None]:
print(X)

In [None]:
# Scale the new data using the same MinMaxScaler object used to preprocess the dataset
new_data = scaler.transform(new_data)

In [None]:
# Make a prediction using the model
prediction = model.predict(new_data)

In [None]:
# Reshape the data_min and data_max arrays to match the shape of new_data
data_min = scaler.data_min_.reshape(1, -1)
data_max = scaler.data_max_.reshape(1, -1)

In [None]:
# Scale the predicted value back to the original range using the inverse_transform method
prediction_unscaled = (prediction * (data_max - data_min)) + data_min

In [None]:
# Extract the unscaled predicted value from the array
unscaled_target = prediction_unscaled[0][-1]

In [None]:
# Print the predicted electricity price in the original units
print(prediction_unscaled)

# Print the predicted electricity price
print(prediction)

In [None]:
import joblib

# Save the trained model to a file
joblib.dump(model, 'model.joblib')
