## Import Library

In [71]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

## Load Dataset

In [72]:
file_path = 'price.csv'
df = pd.read_csv(file_path)

In [46]:
df.shape

(35040, 2)

In [47]:
df.head()

Unnamed: 0,timestamp,AT_price_day_ahead_EUR_MW
0,2015-01-01 00:00:00+00:00,60.6
1,2015-01-01 00:15:00+00:00,61.72
2,2015-01-01 00:30:00+00:00,28.0
3,2015-01-01 00:45:00+00:00,15.68
4,2015-01-01 01:00:00+00:00,65.08


In [48]:
df.describe()

Unnamed: 0,AT_price_day_ahead_EUR_MW
count,35040.0
mean,31.741696
std,14.775249
min,-149.0
25%,22.5
50%,31.0
75%,41.2125
max,381.11


## Data Preprocessing

In [74]:
# Convert the timestamp column to datetime and sort by it
df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.sort_values(by='timestamp')

In [75]:
df.head()

Unnamed: 0,timestamp,AT_price_day_ahead_EUR_MW
0,2015-01-01 00:00:00+00:00,60.6
1,2015-01-01 00:15:00+00:00,61.72
2,2015-01-01 00:30:00+00:00,28.0
3,2015-01-01 00:45:00+00:00,15.68
4,2015-01-01 01:00:00+00:00,65.08


In [78]:
# Create features directly from AT_price_day_ahead_EUR_MW
df['price_lag_1'] = df['AT_price_day_ahead_EUR_MW'].shift(1)
df['price_lag_2'] = df['AT_price_day_ahead_EUR_MW'].shift(2)
df['price_moving_avg_3'] = df['AT_price_day_ahead_EUR_MW'].rolling(window=3).mean()
df['price_std_3'] = df['AT_price_day_ahead_EUR_MW'].rolling(window=3).std()

In [80]:
# Drop rows with NaN values caused by lagging/rolling
df = df.dropna()

In [81]:
df.head()

Unnamed: 0,timestamp,AT_price_day_ahead_EUR_MW,price_lag_1,price_lag_2,price_moving_avg_3,price_std_3
2,2015-01-01 00:30:00+00:00,28.0,61.72,60.6,50.106667,19.153123
3,2015-01-01 00:45:00+00:00,15.68,28.0,61.72,35.133333,23.834507
4,2015-01-01 01:00:00+00:00,65.08,15.68,28.0,36.253333,25.713384
5,2015-01-01 01:15:00+00:00,54.18,65.08,15.68,44.98,25.953227
6,2015-01-01 01:30:00+00:00,40.72,54.18,65.08,53.326667,12.202399


## Data Split

In [89]:
# Define features (X) and target (y)
X = df[['price_lag_1', 'price_lag_2', 'price_moving_avg_3', 'price_std_3']]
y = df['AT_price_day_ahead_EUR_MW']

In [90]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Model Implementation

In [85]:
# Train a Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [86]:
# Predict on the test set
y_pred = model.predict(X_test)

In [87]:
# Calculate the R2 score
r2 = r2_score(y_test, y_pred)
print(f"R2 Score: {r2}")

R2 Score: 0.9433495348624268
