## RETRIEVE NASA API DATA

In [None]:
import requests
import csv
import time
import json
import numpy as np
import pandas as pd

In [None]:
# Step 1: Define bounding box (your region)
min_lat, max_lat = 41.644, 42.023
min_lon, max_lon = -87.525, -88.260
lat_step = 0.1
lon_step = 0.1

# Step 2: Create grid of points
latitudes = np.arange(min_lat, max_lat, lat_step)
longitudes = np.arange(min_lon, max_lon, lon_step)

# Valid parameters for monthly data
parameters = "ALLSKY_SFC_SW_DWN,CLRSKY_SFC_SW_DWN,ALLSKY_SFC_SW_DNI,ALLSKY_KT,ALLSKY_SFC_LW_DWN,CLRSKY_SFC_PAR_TOT"

# Base API URL
base_url = "https://power.larc.nasa.gov/api/temporal/monthly/point"

# Step 3: Loop through each grid point and request data
all_data = []

for lat in latitudes:
    for lon in longitudes:
        print(f"Requesting data for ({lat:.3f}, {lon:.3f})...")
        url = (
            f"{base_url}?parameters={parameters}&community=RE"
            f"&longitude={lon:.3f}&latitude={lat:.3f}"
            f"&start=2019&end=2023&format=JSON"
        )

        try:
            response = requests.get(url)
            data = response.json()

            records = data["properties"]["parameter"]
            months = list(data["properties"]["parameter"][list(records.keys())[0]].keys())

            # Create DataFrame
            df = pd.DataFrame({
                "YearMonth": months,
                "Latitude": lat,
                "Longitude": lon
            })

            for param in records:
                df[param] = [records[param][month] for month in months]

            all_data.append(df)

            # Avoid hitting API too hard
            time.sleep(0.5)

        except Exception as e:
            print(f"Failed at ({lat:.3f}, {lon:.3f}): {e}")

# Step 4: Combine and save
combined_df = pd.concat(all_data, ignore_index=True)
combined_df.to_csv("solar_region_data_2019.csv", index=False)
print("All regional data saved to 'solar_region_data_2019.csv'")

In [None]:
# Step 1: Define bounding box (your region)
min_lat, max_lat = 41.644, 42.0236
min_lon, max_lon = -87.940, -87.523
lat_step = 0.01
lon_step = 0.01

# Step 2: Create grid of points
latitudes = np.arange(min_lat, max_lat, lat_step)
longitudes = np.arange(min_lon, max_lon, lon_step)

# Valid parameters for monthly data7
parameters = "ALLSKY_SFC_SW_DWN,CLRSKY_SFC_SW_DWN,ALLSKY_SFC_SW_DNI,ALLSKY_KT,ALLSKY_SFC_LW_DWN,CLRSKY_SFC_PAR_TOT"

# Base API URL
base_url = "https://power.larc.nasa.gov/api/temporal/monthly/point"

# Step 3: Loop through each grid point and request data
all_data = []

for lat in latitudes:
    for lon in longitudes:
        print(f"Requesting data for ({lat:.3f}, {lon:.3f})...")
        url = (
            f"{base_url}?parameters={parameters}&community=RE"
            f"&longitude={lon:.3f}&latitude={lat:.3f}"
            f"&start=2019&end=2024&format=JSON"
        )

        try:
            response = requests.get(url)
            data = response.json()

            records = data["properties"]["parameter"]
            months = list(data["properties"]["parameter"][list(records.keys())[0]].keys())

            # Create DataFrame
            df = pd.DataFrame({
                "YearMonth": months,
                "Latitude": lat,
                "Longitude": lon
            })

            for param in records:
                df[param] = [records[param][month] for month in months]

            all_data.append(df)

            # Avoid hitting API too hard
            time.sleep(0.5)

        except Exception as e:
            print(f"Failed at ({lat:.3f}, {lon:.3f}): {e}")

# Step 4: Combine and save
combined_df = pd.concat(all_data, ignore_index=True)
combined_df.to_csv("chicago_solar_region_data1.csv", index=False)
print("All regional data saved to 'chicago_solar_region_data1.csv'")


Requesting data for (41.644, -87.940)...
Requesting data for (41.644, -87.930)...
Requesting data for (41.644, -87.920)...
Requesting data for (41.644, -87.910)...
Requesting data for (41.644, -87.900)...
Requesting data for (41.644, -87.890)...
Requesting data for (41.644, -87.880)...
Requesting data for (41.644, -87.870)...
Requesting data for (41.644, -87.860)...
Requesting data for (41.644, -87.850)...
Requesting data for (41.644, -87.840)...
Requesting data for (41.644, -87.830)...
Requesting data for (41.644, -87.820)...
Requesting data for (41.644, -87.810)...
Requesting data for (41.644, -87.800)...
Requesting data for (41.644, -87.790)...
Requesting data for (41.644, -87.780)...
Requesting data for (41.644, -87.770)...
Requesting data for (41.644, -87.760)...
Requesting data for (41.644, -87.750)...
Requesting data for (41.644, -87.740)...
Requesting data for (41.644, -87.730)...
Requesting data for (41.644, -87.720)...
Requesting data for (41.644, -87.710)...
Requesting data 

In [None]:
#CLEAN THE DATA
import pandas as pd

# Load the CSV
df = pd.read_csv("cook_solar.csv") 

# Convert all values to numeric if possible, coerce errors (just in case)
df = df.apply(pd.to_numeric, errors='coerce')

# Filter out rows where ANY column has -999.0
df_cleaned = df[~(df == -999.0).any(axis=1)]

# Save the cleaned data
df_cleaned.to_csv("cleaned_file.csv", index=False)



In [25]:
# Convert YearMonth to year and month
df['Year'] = df['YearMonth'].astype(str).str[:4].astype(int)
df['Month'] = df['YearMonth'].astype(str).str[4:].astype(int)
# Remove the 13th month
df = df[df['Month'] != 13]

df['Month_sin'] = np.sin(2 * np.pi * df['Month'] / 12)
df['Month_cos'] = np.cos(2 * np.pi * df['Month'] / 12)

df.head()

Unnamed: 0,YearMonth,Latitude,Longitude,ALLSKY_KT,ALLSKY_SFC_LW_DWN,ALLSKY_SFC_SW_DNI,ALLSKY_SFC_SW_DWN,CLRSKY_SFC_PAR_TOT,CLRSKY_SFC_SW_DWN,Year,Month,Month_sin,Month_cos
0,201901,41.644,-87.94,0.34,6.05,1.98,1.38,1.19,2.82,2019,1,0.5,0.8660254
1,201902,41.644,-87.94,0.39,6.4,2.32,2.12,1.66,3.89,2019,2,0.866025,0.5
2,201903,41.644,-87.94,0.48,6.44,4.19,3.62,2.33,5.46,2019,3,1.0,6.123234000000001e-17
3,201904,41.644,-87.94,0.47,7.43,4.23,4.53,2.97,6.79,2019,4,0.866025,-0.5
4,201905,41.644,-87.94,0.44,8.32,3.64,4.85,3.38,7.58,2019,5,0.5,-0.8660254


In [26]:
target = 'ALLSKY_SFC_SW_DWN'  # average daily shortwave radiation

In [31]:
#train test split
from sklearn.model_selection import train_test_split

features = ['Latitude', 'Longitude', 'Month_sin', 'Month_cos','ALLSKY_KT', 'ALLSKY_SFC_LW_DWN']  # you can add others
x = df[features]
y = df[target]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [32]:
#train the model

from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(x_train, y_train)

In [33]:
#Evaluate
from sklearn.metrics import mean_absolute_error, r2_score

y_pred = model.predict(x_test)
print("MAE:", mean_absolute_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))

MAE: 1.1499643924374459e-14
R² Score: 1.0


In [36]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(model, x, y, cv=5, scoring='neg_mean_absolute_error')
print("Cross-validated MAE:", -scores.mean())

Cross-validated MAE: 0.01160525562373289


In [37]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(RandomForestRegressor(), param_grid, cv=5)
grid_search.fit(x_train, y_train)

# Check best parameters
print("Best parameters:", grid_search.best_params_)
model = grid_search.best_estimator_


Best parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}


In [79]:
# Example: Predict for Chicago in April
# Inputs
lat,lon = 43.75041438389484, -84.84492681884045
month = 7
ALLSKY_KT = 0.47
ALLSKY_SFC_LW_DWN = 7.43

# Encode month cyclically
month_sin = np.sin(2 * np.pi * month / 12)
month_cos = np.cos(2 * np.pi * month / 12)

# Feature names — must match training
features = ['Latitude', 'Longitude', 'Month_sin', 'Month_cos', 'ALLSKY_KT', 'ALLSKY_SFC_LW_DWN']

# Create a DataFrame for prediction
X_new = pd.DataFrame([[lat, lon, month_sin, month_cos, ALLSKY_KT, ALLSKY_SFC_LW_DWN]], columns=features)

# Predict
prediction = model.predict(X_new)
print("Estimated daily solar energy (kWh/m²):", prediction[0])


Estimated daily solar energy (kWh/m²): 5.188199999999994


In [None]:
#SAVE THE MODEL
import joblib

# Save the best estimator
joblib.dump(model, 'solar_model.pkl')

In [52]:
import joblib
import math
# Load your saved model
model = joblib.load('solar_model.pkl')


# Fixed features (hardcoded)
# User input (change lat/lon values here)
lat,lon = 41.749869979845556, -88.16472257924035
month = 5
ALLSKY_KT = 0.47
ALLSKY_SFC_LW_DWN = 7.43

month_sin = np.sin(2 * np.pi * month / 12)
month_cos = np.cos(2 * np.pi * month / 12)



# Create feature vector
input_features = np.array([[lat, lon, month_sin, month_cos, ALLSKY_KT, ALLSKY_SFC_LW_DWN]])

# Predict
prediction = model.predict(input_features)

print(f"Estimated daily solar energy (kWh/m²): {prediction[0]}")


Estimated daily solar energy (kWh/m²): 5.119999999999993


