In [None]:
import requests
import zipfile
import os
from io import BytesIO

def download_and_extract_zip(zip_url, extract_path):
    """Downloads and extracts a ZIP file from Google Drive to the specified path."""

    try:
        response = requests.get(zip_url)
        response.raise_for_status()  # Raise an error for bad responses

        with zipfile.ZipFile(BytesIO(response.content)) as zip_file:
            for member in zip_file.namelist():
                filename = os.path.basename(member)
                if not filename:
                    continue  # Skip directories

                source = zip_file.open(member)
                target_path = os.path.join(extract_path, filename)

                with open(target_path, "wb") as target_file:
                    target_file.write(source.read())

    except requests.exceptions.RequestException as e:
        print(f"Download error for '{zip_url}': {e}")
    except zipfile.BadZipFile:
        print(f"Invalid ZIP file: '{zip_url}'.")
    except Exception as e:
        print(f"Unexpected error for '{zip_url}': {e}")

# List of Google Drive file IDs
zip_file_ids = ["1As-67MFNrpim_jGYgSOgT2SF119gP790", "1pjaRe_lSIygHyZ8luWplSC6fuwAEmwTd"]

# Get the current working directory
script_directory = os.getcwd()

# Construct direct download links for the ZIP files
zip_urls = [f"https://drive.google.com/uc?export=download&id={file_id}" for file_id in zip_file_ids]

# Download and extract each ZIP file directly into the current working directory
for zip_url in zip_urls:
    download_and_extract_zip(zip_url, script_directory)


In [None]:
import pandas as pd
import os
import glob

# Read CSV files
production_data = pd.read_csv("production.csv")
production_data['date'] = pd.to_datetime(production_data['date'])

weather_data = pd.read_csv("processed_weather.csv")
weather_data['date'] = pd.to_datetime(weather_data['date'])

# Sort the data by date and hour
production_data = production_data.sort_values(by=["date", "hour"])
weather_data = weather_data.sort_values(by=["date", "hour", "lat", "lon"])

# Fill missing values in weather_data using the previous day's same hour, lat, and lon values
for col in weather_data.columns:
    if col in ['date', 'hour', 'lat', 'lon']:
        continue
    weather_data[col] = weather_data.groupby(['hour', 'lat', 'lon'])[col].transform(lambda x: x.fillna(method='ffill'))

# Identify common dates and hours present in both datasets
common_dates = set(production_data["date"].unique()) & set(weather_data["date"].unique())
common_hours = set(production_data["hour"].unique()) & set(weather_data["hour"].unique())

# Save each coordinate's weather data to a separate file
for lat in weather_data["lat"].unique():
    for lon in weather_data["lon"].unique():
        # Filter weather data for the specific coordinate
        coord_weather = weather_data[(weather_data["lat"] == lat) & (weather_data["lon"] == lon)]

        # Create the file name
        dosya_adi = f"koordinat_{lat}_{lon}.csv"

        # Write headers if the file does not exist, otherwise append data without headers
        if not os.path.exists(dosya_adi):
            coord_weather.to_csv(dosya_adi, index=False, header=True)  # Write headers for the first time
        else:
            coord_weather.to_csv(dosya_adi, mode='a', header=False, index=False)  # Do not write headers for subsequent appends

        # Open the file and remove lat and lon columns
        df = pd.read_csv(dosya_adi)
        df = df.drop(["lat", "lon"], axis=1)
        df.to_csv(dosya_adi, index=False)

# Perform correlation analysis for each coordinate
koordinat_korelasyonlari = {}
for dosya_adi in glob.glob("koordinat_*.csv"):
    # Read the coordinate's data
    koordinat_data = pd.read_csv(dosya_adi)
    koordinat_data['date'] = pd.to_datetime(koordinat_data['date'])

    # Filter data for common dates and hours
    koordinat_data = koordinat_data[(koordinat_data["date"].isin(common_dates)) & (koordinat_data["hour"].isin(common_hours))]

    # Merge production data with coordinate data
    merged_data = pd.merge(
        production_data[(production_data["date"].isin(common_dates)) & (production_data["hour"].isin(common_hours))],
        koordinat_data,
        on=["date", "hour"]
    )

    # Calculate correlation for each weather variable
    correlations = {}
    for col in koordinat_data.columns:
        if col in ['date', 'hour']:
            continue
        correlation = merged_data["production"].corr(merged_data[col])
        correlations[col] = correlation

    # Save correlations for the coordinate
    koordinat_korelasyonlari[dosya_adi] = correlations

# Calculate influence factors for each variable for each coordinate
koordinat_etkileri = {}
for koordinat, korelasyonlar in koordinat_korelasyonlari.items():
    koordinat_etkileri[koordinat] = {}
    for degisken, korelasyon in korelasyonlar.items():
        # Influence is calculated as the square of the absolute value of the correlation
        etki = abs(korelasyon) ** 2
        koordinat_etkileri[koordinat][degisken] = etki

# Normalize influence factors for each variable
for degisken in koordinat_etkileri[list(koordinat_etkileri.keys())[0]].keys():
    etki_toplami = sum(koordinat_etkileri[koordinat][degisken] for koordinat in koordinat_etkileri)
    for koordinat in koordinat_etkileri:
        koordinat_etkileri[koordinat][degisken] /= etki_toplami

# Convert influence factors to a DataFrame and print them
etki_df = pd.DataFrame(koordinat_etkileri).T.reset_index()
print("Koordinat Etki Çarpanları:")
print(etki_df.to_string(index=False))

# Multiply each column in coordinate files by the corresponding influence factor and combine the results
sonuc_df = pd.DataFrame()
for dosya_adi in glob.glob("koordinat_*.csv"):
    # Read the coordinate data
    koordinat_data = pd.read_csv(dosya_adi)
    koordinat_data['date'] = pd.to_datetime(koordinat_data['date'])

    # Get the influence factors for the coordinate
    etkiler = koordinat_etkileri[dosya_adi]

    # Multiply each column by the corresponding influence factor
    for degisken, etki in etkiler.items():
        koordinat_data[degisken] = koordinat_data[degisken] * etki

    # Group by date and hour and sum the values
    koordinat_data = koordinat_data.groupby(['date', 'hour']).sum().reset_index()

    # Append to the result DataFrame
    sonuc_df = pd.concat([sonuc_df, koordinat_data], ignore_index=True)

# Group the results by date and hour and sum the values
sonuc_df = sonuc_df.groupby(['date', 'hour']).sum().reset_index()

# Round values to 3 decimal places where necessary
for col in sonuc_df.columns:
    if col in ['date', 'hour']:
        continue
    sonuc_df[col] = sonuc_df[col].round(3)

# Save the final result to a CSV file
sonuc_df.to_csv("agirlikli.csv", index=False)

# Remove the coordinate files as they are no longer needed
for dosya_adi in glob.glob("koordinat_*.csv"):
    os.remove(dosya_adi)


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the production data
production_df = pd.read_csv("production.csv")
production_df['date'] = pd.to_datetime(production_df['date'])

# Convert hour to string and combine with date to create a datetime column
production_df["datetime"] = pd.to_datetime(production_df['date'].dt.strftime('%Y-%m-%d') + ' ' + production_df["hour"].astype(str) + ':00') 

# Load the weather data
weather_df = pd.read_csv("processed_weather.csv")
weather_df['date'] = pd.to_datetime(weather_df['date'])
weather_df["datetime"] = pd.to_datetime(weather_df['date'].dt.strftime('%Y-%m-%d') + ' ' + weather_df["hour"].astype(str) + ':00') 

# Load the weighted weather data
weighted_df = pd.read_csv("agirlikli.csv")
weighted_df['date'] = pd.to_datetime(weighted_df['date'])
weighted_df["datetime"] = pd.to_datetime(weighted_df['date'].dt.strftime('%Y-%m-%d') + ' ' + weighted_df["hour"].astype(str) + ':00') 

# Descriptive Analysis: Production Data

# 1. Daily Production Averages
daily_production_mean = production_df.groupby(production_df["datetime"].dt.date)["production"].mean()
plt.figure(figsize=(12, 6))
plt.plot(daily_production_mean.index, daily_production_mean.values, label="Average Daily Production")
plt.xlabel("Date")
plt.ylabel("Average Production (MWh)")
plt.title("Average Daily Solar Power Production")
plt.xticks(rotation=45) 
plt.legend()
plt.tight_layout()
plt.show()

# 2. Hourly Production Averages
hourly_production_mean = production_df.groupby(production_df["datetime"].dt.hour)["production"].mean()
plt.figure(figsize=(8, 4))
plt.bar(hourly_production_mean.index, hourly_production_mean.values)
plt.xlabel("Hour of Day")
plt.ylabel("Average Production (MWh)")
plt.title("Average Hourly Solar Power Production")
plt.show()

# Descriptive Analysis: Weighted Weather Data
weather_variables = ['dswrf_surface', 'tcdc_low.cloud.layer', 'tcdc_middle.cloud.layer',
                   'tcdc_high.cloud.layer', 'tcdc_entire.atmosphere', 'uswrf_top_of_atmosphere',
                   'csnow_surface', 'dlwrf_surface', 'uswrf_surface', 'tmp_surface']

# Plot daily averages for each weather variable
for variable in weather_variables:
    # Calculate daily averages for the weighted variable
    daily_weighted_mean = weighted_df.groupby(weighted_df["datetime"].dt.date)[variable].mean()

    # Plot the daily averages
    plt.figure(figsize=(12, 6))
    plt.plot(daily_weighted_mean.index, daily_weighted_mean.values, label=f"Daily Average {variable.replace('.',' ')}")
    plt.xlabel("Date")
    plt.ylabel(f"Daily Average {variable.replace('.',' ')}")
    plt.title(f"Daily Average {variable.replace('.',' ')} Time Series")
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    plt.show()


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the weighted weather data
weighted_df = pd.read_csv("agirlikli.csv")
weighted_df['date'] = pd.to_datetime(weighted_df['date'])
weighted_df["datetime"] = pd.to_datetime(weighted_df['date'].dt.strftime('%Y-%m-%d') + ' ' + weighted_df["hour"].astype(str) + ':00')

# Load the production data
production_df = pd.read_csv("production.csv")
production_df['date'] = pd.to_datetime(production_df['date']) 

# Merge production data with weighted weather data on date and hour
merged_df = pd.merge(production_df, weighted_df, on=["date", "hour"], how="inner")

# Calculate the correlation of each weather variable with production
correlations = merged_df.corr()["production"].drop("production")

# Print the correlation values
print("Correlations of Weighted Weather Data with Production:")
print(correlations)

# Plot the correlation heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(correlations.to_frame(), annot=True, cmap="coolwarm")
plt.title("Correlation Heatmap: Production vs. Weighted Weather Data")
plt.show()

# List of weather variables to analyze
weather_variables = ['dswrf_surface', 'tcdc_low.cloud.layer', 'tcdc_middle.cloud.layer',
                   'tcdc_high.cloud.layer', 'tcdc_entire.atmosphere', 'uswrf_top_of_atmosphere',
                   'csnow_surface', 'dlwrf_surface', 'uswrf_surface', 'tmp_surface']

# Analyze the correlation between daily averages of weather variables and production
for variable in weather_variables:
    # Merge production and weighted weather data
    merged_df = pd.merge(production_df, weighted_df, on=["date", "hour"], how="inner")

    # Filter data to only include common dates
    common_dates = set(production_df["date"].unique()) & set(weighted_df["date"].unique())
    merged_df = merged_df[merged_df["date"].isin(common_dates)]

    # Calculate daily averages for production and the weather variable
    daily_production_mean = merged_df.groupby(merged_df["date"])["production"].mean()
    daily_weighted_mean = merged_df.groupby(merged_df["date"])[variable].mean()

    # Create a scatter plot of daily averages
    plt.figure(figsize=(10, 6))
    plt.scatter(daily_weighted_mean, daily_production_mean)
    plt.xlabel(f"Daily Average {variable.replace('.',' ')}")
    plt.ylabel("Daily Average Production (MWh)")
    plt.title(f"Daily Average Production vs. Daily Average {variable.replace('.',' ')}")
    plt.show()


In [None]:
import pandas as pd
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt

# Load the production data
production_df = pd.read_csv("production.csv")

# Load the weighted weather data
averages_df = pd.read_csv("agirlikli.csv")


# Ensure 'datetime' column exists in both dataframes
if 'date' in production_df.columns and 'hour' in production_df.columns:
    production_df["datetime"] = pd.to_datetime(production_df["date"] + " " + production_df["hour"].astype(str) + ":00")

if 'date' in averages_df.columns and 'hour' in averages_df.columns:
    averages_df["datetime"] = pd.to_datetime(averages_df["date"] + " " + averages_df["hour"].astype(str) + ":00")



# Merge production data with weighted weather data on datetime
merged_df = pd.merge(production_df, averages_df, on="datetime", how="inner")

# Plot the Autocorrelation Function (ACF) for the production data
plot_acf(merged_df["production"], lags=50)  
plt.show()

# Plot the Partial Autocorrelation Function (PACF) for the production data
plot_pacf(merged_df["production"], lags=50)  
plt.show()


In [None]:
import pandas as pd
import numpy as np
from pmdarima import auto_arima
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from joblib import Parallel, delayed

# Load data
production_df = pd.read_csv("production.csv")
weather_df = pd.read_csv("agirlikli.csv")

# Convert date columns to datetime
production_df["datetime"] = pd.to_datetime(production_df["date"] + " " + production_df["hour"].astype(str) + ":00")
weather_df["datetime"] = pd.to_datetime(weather_df["date"] + " " + weather_df["hour"].astype(str) + ":00")

# Merge data on datetime
merged_df = pd.merge(production_df, weather_df, on="datetime", how="inner")

# Drop unnecessary columns
merged_df = merged_df.drop(columns=["date_x", "hour_x", "date_y", "hour_y"])

# Fill missing values
merged_df = merged_df.ffill()

# Define features and target
exog_features = ['dswrf_surface', 'tcdc_low.cloud.layer', 'tcdc_middle.cloud.layer',
                 'tcdc_high.cloud.layer', 'tcdc_entire.atmosphere', 'uswrf_top_of_atmosphere',
                 'csnow_surface', 'dlwrf_surface', 'uswrf_surface', 'tmp_surface']
target = 'production'

# Function to train ARIMA model for a specific hour
def train_arima_for_hour(hour):
    # Filter data for the specific hour
    df_hour = merged_df[merged_df['datetime'].dt.hour == hour].copy()
    
    # Return None if no data for the hour
    if df_hour.empty:
        return hour, None

    # Train ARIMA model with exogenous features
    model = auto_arima(df_hour[target], exogenous=df_hour[exog_features], seasonal=True, m=24, 
                       stepwise=True, suppress_warnings=True, error_action='ignore',
                       max_p=2, max_q=2, max_P=1, max_Q=1, max_order=4, max_d=1, max_D=1)
    
    return hour, model

# Train ARIMA models in parallel for each hour
models = dict(Parallel(n_jobs=-1)(delayed(train_arima_for_hour)(hour) for hour in range(24)))

# Forecasting for May 25, 2024
forecast_date = pd.date_range('2024-05-25', periods=24, freq='H')
forecast_df = pd.DataFrame({'datetime': forecast_date})

# Add exogenous features to the forecast dataframe
for feature in exog_features:
    forecast_df[feature] = weather_df[weather_df['datetime'].isin(forecast_date)][feature].values

# List to store predictions
all_predictions = []
for hour in range(24):
    model = models.get(hour)
    if model is None:
        all_predictions.append([0])
        continue

    # Forecast for the specific hour
    hourly_forecast = forecast_df[forecast_df['datetime'].dt.hour == hour]
    forecast = model.predict(n_periods=1, exogenous=hourly_forecast[exog_features])
    forecast = np.clip(forecast, a_min=0, a_max=None)
    all_predictions.append(forecast)

# Flatten predictions and calculate error metrics
flattened_predictions = [pred for sublist in all_predictions for pred in sublist]

# Ensure the prediction length matches the actuals
actuals = merged_df[merged_df['datetime'].dt.date == pd.Timestamp('2024-05-23').date()]['production'].values[:len(flattened_predictions)]
mse = mean_squared_error(actuals, flattened_predictions)
rmse = mse ** 0.5


# Print the forecast for May 25, 2024
print("\n2024-05-25 Production Forecast:")
for hour, predictions in enumerate(all_predictions):
    for prediction in predictions:
        print(f"  Hour {hour:02d}: {prediction:.2f}")

# Print error metrics
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")


# Optional: Print hourly predictions in a single line
hourly_predictions_str = ", ".join([f"{prediction:.2f}" if prediction != 0 else "0" for predictions in all_predictions for prediction in predictions])
print(hourly_predictions_str)
