In [27]:
# Importowanie bibliotek
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from datetime import datetime
import matplotlib.pyplot as plt

In [28]:
# Wczytanie danych
data = pd.read_csv("D:\SAGES\project\Energy_consumption.csv")

In [29]:
# Konwersja kolumny 'Timestamp' na format datetime
data['Timestamp'] = pd.to_datetime(data['Timestamp'])

In [30]:
# Wyświetlenie podstawowych informacji
print("Podstawowe informacje o danych:")
print(data.info())
print("\nPodgląd danych:")
print(data.head())

Podstawowe informacje o danych:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Timestamp          1000 non-null   datetime64[ns]
 1   Temperature        1000 non-null   float64       
 2   Humidity           1000 non-null   float64       
 3   SquareFootage      1000 non-null   float64       
 4   Occupancy          1000 non-null   int64         
 5   HVACUsage          1000 non-null   object        
 6   LightingUsage      1000 non-null   object        
 7   RenewableEnergy    1000 non-null   float64       
 8   DayOfWeek          1000 non-null   object        
 9   Holiday            1000 non-null   object        
 10  EnergyConsumption  1000 non-null   float64       
dtypes: datetime64[ns](1), float64(5), int64(1), object(4)
memory usage: 86.1+ KB
None

Podgląd danych:
            Timestamp  Temperature   Humidi

In [31]:
# Eksploracja danych
print("\nSprawdzanie braków danych:")
print(data.isnull().sum())


Sprawdzanie braków danych:
Timestamp            0
Temperature          0
Humidity             0
SquareFootage        0
Occupancy            0
HVACUsage            0
LightingUsage        0
RenewableEnergy      0
DayOfWeek            0
Holiday              0
EnergyConsumption    0
dtype: int64


In [32]:
le = LabelEncoder()

for col in ['HVACUsage','LightingUsage', 'DayOfWeek', 'Holiday']:
    le.fit(data[col])
    data[col] = le.transform(data[col])

data.head()

Unnamed: 0,Timestamp,Temperature,Humidity,SquareFootage,Occupancy,HVACUsage,LightingUsage,RenewableEnergy,DayOfWeek,Holiday,EnergyConsumption
0,2022-01-01 00:00:00,25.139433,43.431581,1565.693999,5,1,0,2.774699,1,0,75.364373
1,2022-01-01 01:00:00,27.731651,54.225919,1411.064918,1,1,1,21.831384,2,0,83.401855
2,2022-01-01 02:00:00,28.704277,58.907658,1755.715009,2,0,0,6.764672,3,0,78.270888
3,2022-01-01 03:00:00,20.080469,50.371637,1452.316318,1,0,1,8.623447,6,0,56.51985
4,2022-01-01 04:00:00,23.097359,51.401421,1094.130359,9,1,0,3.071969,0,0,70.811732


In [33]:
# Wypełnianie braków danych (jeśli występują)
#data.fillna(data.median(), inplace=True)

In [34]:
# Tworzenie cech czasowych z kolumny 'Timestamp'
data['Hour'] = data.index.hour  # Godzina
data['Day'] = data.index.day    # Dzień miesiąca
data['Month'] = data.index.month  # Miesiąc
data['Weekday'] = data.index.weekday  # Dzień tygodnia (0=poniedziałek)

AttributeError: 'RangeIndex' object has no attribute 'hour'

In [None]:
# Wybór cech i zmiennej docelowej
X = data.drop(columns=['Energy'])  # Cechy
y = data['Energy']                # Zmienna docelowa

In [None]:
# Skalowanie cech
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Podział danych na zbiór treningowy i testowy
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, shuffle=False)

In [None]:
# Trenowanie modelu
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
# Predykcja na zbiorze testowym
y_pred = model.predict(X_test)

In [None]:
# Obliczenie metryk
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

In [None]:
print("\nWyniki modelu:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")

In [None]:
# Wykres wyników: rzeczywiste vs przewidywane
plt.figure(figsize=(10, 6))
plt.plot(y_test.index, y_test, label="Rzeczywiste", color="blue", alpha=0.6)
plt.plot(y_test.index, y_pred, label="Przewidywane", color="orange", alpha=0.6)
plt.title("Rzeczywiste vs Przewidywane zapotrzebowanie na energię")
plt.xlabel("Czas")
plt.ylabel("Zapotrzebowanie na energię")
plt.legend()
plt.show()

In [None]:
# Opcjonalnie: zapisanie wyników do pliku
output = pd.DataFrame({'Timestamp': y_test.index, 'Actual': y_test, 'Predicted': y_pred})
output.to_csv('time_series_energy_predictions.csv', index=False)
print("\nPredykcje zapisane do pliku 'time_series_energy_predictions.csv'")