In [4]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [5]:
# Read in the data
data_path = '../preprocessing/data'
obs_A = pd.read_parquet(f'{data_path}/obs_A.parquet')
est_A = pd.read_parquet(f'{data_path}/est_A.parquet')
obs_B = pd.read_parquet(f'{data_path}/obs_B.parquet')
est_B = pd.read_parquet(f'{data_path}/est_B.parquet')
obs_C = pd.read_parquet(f'{data_path}/obs_C.parquet')
est_C = pd.read_parquet(f'{data_path}/est_C.parquet')

test_A = pd.read_parquet(f'{data_path}/test_A.parquet').dropna()
test_B = pd.read_parquet(f'{data_path}/test_B.parquet').dropna()
test_C = pd.read_parquet(f'{data_path}/test_C.parquet').dropna()

# Concatenate
A = pd.concat([obs_A, est_A]).drop(columns=['date_forecast'])
B = pd.concat([obs_B, est_B]).drop(columns=['date_forecast'])
C = pd.concat([obs_C, est_C]).drop(columns=['date_forecast'])

In [6]:

angle_range = np.arange(0, 91, 1)  


featuresA = pd.DataFrame()

# Loop through each hypothetical panel angle
for angle in angle_range:
    # Calculate the hypothetical optimal interaction terms for that angle
    # Adjust these calculations as per your hypothesis and data availability
    sun_elevation_interaction = np.cos(np.radians(angle -A['sun_elevation:d'])) *A['direct_rad:W']
    sun_azimuth_interaction = np.cos(np.radians(A['sun_azimuth:d'] - 180)) * A['direct_rad:W']  # Assuming south-facing
    cloud_cover_interaction = (1 -A['effective_cloud_cover:p']/100) * A['direct_rad:W']  # Assuming percentage cloud cover
    
    # Combine interactions into a composite feature
    composite_feature = sun_elevation_interaction + sun_azimuth_interaction + cloud_cover_interaction

    # Add the composite feature to the placeholder DataFrame
    feature_label = f'composite_feature_angle_{angle}'
    featuresA[feature_label] = composite_feature

# Now you can join this with your existing data (assumingA has the same index as featuresA)
A_with_features =A.join(featuresA)

# Assume 'pv_measurement' is the target variable
X = A_with_features.drop('pv_measurement', axis=1)
y = A_with_features['pv_measurement']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit a model
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Feature importance
importances = model.feature_importances_

# Map feature importances back to the corresponding angles
angle_to_importance = {angle:importance for angle, importance in zip(angle_range, importances)}

# Now, you can sort this dictionary based on importance to find the most predictive angles
sorted_angles = sorted(angle_to_importance.items(), key=lambda x: x[1], reverse=True)

print("Feature importances by angle:")
for angle, importance in sorted_angles:
    print(f"Angle: {angle} degrees, Importance: {importance}")


Mean Squared Error: 128642.64554064735
Feature importances by angle:
Angle: 25 degrees, Importance: 0.7438173940681391
Angle: 4 degrees, Importance: 0.04965235099124046
Angle: 9 degrees, Importance: 0.022796956787788574
Angle: 65 degrees, Importance: 0.012468882989982147
Angle: 42 degrees, Importance: 0.007692404904620061
Angle: 70 degrees, Importance: 0.00519973215429635
Angle: 74 degrees, Importance: 0.004963040478390967
Angle: 22 degrees, Importance: 0.004342646976497527
Angle: 6 degrees, Importance: 0.0043386968667292235
Angle: 45 degrees, Importance: 0.0039058043686443925
Angle: 72 degrees, Importance: 0.0034338548866612376
Angle: 43 degrees, Importance: 0.003324707775953856
Angle: 75 degrees, Importance: 0.003299542873474837
Angle: 21 degrees, Importance: 0.0032246573674673366
Angle: 71 degrees, Importance: 0.00318631281616622
Angle: 51 degrees, Importance: 0.0030173375408062684
Angle: 78 degrees, Importance: 0.0029705012858074233
Angle: 13 degrees, Importance: 0.0029685386875295

In [11]:

angle_range = np.arange(0, 91, 1)  


featuresB = pd.DataFrame()

# Loop through each hypothetical panel angle
for angle in angle_range:
    # Calculate the hypothetical optimal interaction terms for that angle
    # Adjust these calculations as per your hypothesis and data availability
    sun_elevation_interaction = np.cos(np.radians(angle -B['sun_elevation:d'])) *B['direct_rad:W']
    sun_azimuth_interaction = np.cos(np.radians(B['sun_azimuth:d'] - 180)) * B['direct_rad:W']  # Assuming south-facing
    cloud_cover_interaction = (1 -B['effective_cloud_cover:p']/100) * B['direct_rad:W']  # Assuming percentage cloud cover
    
    # Combine interactions into a composite feature
    composite_feature = sun_elevation_interaction + sun_azimuth_interaction + cloud_cover_interaction

    # Add the composite feature to the placeholder DataFrame
    feature_label = f'composite_feature_angle_{angle}'
    featuresB[feature_label] = composite_feature

# Now you can join this with your existing data (assumingA has the same index as featuresB)
B_with_features =B.join(featuresB)

# Assume 'pv_measurement' is the target variable
X = B_with_features.drop('pv_measurement', axis=1)
y = B_with_features['pv_measurement']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit a model
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Feature importance
importances = model.feature_importances_

# Map feature importances back to the corresponding angles
angle_to_importance = {angle:importance for angle, importance in zip(angle_range, importances)}

# Now, you can sort this dictionary based on importance to find the most predictive angles
sorted_angles = sorted(angle_to_importance.items(), key=lambda x: x[1], reverse=True)

print("Feature importances by angle:")
for angle, importance in sorted_angles:
    print(f"Angle: {angle} degrees, Importance: {importance}")




Mean Squared Error: 2540.531826490065
Feature importances by angle:
Angle: 9 degrees, Importance: 0.7085481662427173
Angle: 7 degrees, Importance: 0.09684733196004074
Angle: 25 degrees, Importance: 0.052077456141024
Angle: 0 degrees, Importance: 0.009262596516682209
Angle: 4 degrees, Importance: 0.0066473574444597975
Angle: 2 degrees, Importance: 0.006429185254349323
Angle: 1 degrees, Importance: 0.005330757762601776
Angle: 70 degrees, Importance: 0.0053018415905305375
Angle: 72 degrees, Importance: 0.0045348951713423055
Angle: 42 degrees, Importance: 0.004014118084222411
Angle: 74 degrees, Importance: 0.0038601787601188903
Angle: 75 degrees, Importance: 0.0037068212294727784
Angle: 60 degrees, Importance: 0.0032773421719446937
Angle: 43 degrees, Importance: 0.0032349344252469835
Angle: 71 degrees, Importance: 0.0029181521554117046
Angle: 66 degrees, Importance: 0.002617802183246244
Angle: 65 degrees, Importance: 0.0022961951302629315
Angle: 21 degrees, Importance: 0.002268425315709194

KeyError: "['composite_feature_angle_9'] not in index"

In [10]:
angle_range = np.arange(0, 91, 1)

featuresC = pd.DataFrame()

# Loop through each hypothetical panel angle
for angle in angle_range:
    # Calculate the hypothetical optimal interaction terms for that angle
    # Adjust these calculations as per your hypothesis and data availability
    sun_elevation_interaction = np.cos(np.radians(angle - C['sun_elevation:d'])) * C['direct_rad:W']
    sun_azimuth_interaction = np.cos(np.radians(C['sun_azimuth:d'] - 180)) * C['direct_rad:W']  # Assuming south-facing
    cloud_cover_interaction = (1 - C['effective_cloud_cover:p']/100) * C['direct_rad:W']  # Assuming percentage cloud cover
    
    # Combine interactions into a composite feature
    composite_feature = sun_elevation_interaction + sun_azimuth_interaction + cloud_cover_interaction

    # Add the composite feature to the placeholder DataFrame
    feature_label = f'composite_feature_angle_{angle}'
    featuresC[feature_label] = composite_feature

# Now you can join this with your existing data (assuming A has the same index as featuresC)
C_with_features = C.join(featuresC)

# Assume 'pv_measurement' is the target variable
X = C_with_features.drop('pv_measurement', axis=1)
y = C_with_features['pv_measurement']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit a model
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Feature importance
importances = model.feature_importances_

# Map feature importances back to the corresponding angles
angle_to_importance = {angle: importance for angle, importance in zip(angle_range, importances)}

# Now, you can sort this dictionary based on importance to find the most predictive angles
sorted_angles = sorted(angle_to_importance.items(), key=lambda x: x[1], reverse=True)

print("Feature importances by angle:")
for angle, importance in sorted_angles:
    print(f"Angle: {angle} degrees, Importance: {importance}")


Mean Squared Error: 2240.6723444799104
Feature importances by angle:
Angle: 9 degrees, Importance: 0.7336444283992517
Angle: 0 degrees, Importance: 0.05301765562423198
Angle: 7 degrees, Importance: 0.029827855428512743
Angle: 25 degrees, Importance: 0.02755400547610793
Angle: 75 degrees, Importance: 0.013065197351039528
Angle: 1 degrees, Importance: 0.012605812715196738
Angle: 42 degrees, Importance: 0.006773119492696023
Angle: 4 degrees, Importance: 0.006551534636620361
Angle: 12 degrees, Importance: 0.006401903721066215
Angle: 5 degrees, Importance: 0.004301278541314043
Angle: 2 degrees, Importance: 0.004262109685247191
Angle: 68 degrees, Importance: 0.003538917356093665
Angle: 73 degrees, Importance: 0.0032229153630287757
Angle: 76 degrees, Importance: 0.002977578249117823
Angle: 21 degrees, Importance: 0.0028954151620217343
Angle: 70 degrees, Importance: 0.0027992225268693347
Angle: 72 degrees, Importance: 0.002735351387715627
Angle: 43 degrees, Importance: 0.0026905672056662376
An