In [None]:
import requests
import zipfile
import os
from io import BytesIO

def download_and_extract_zip(zip_url, extract_path):
    """Downloads and extracts a ZIP file from Google Drive to the specified path."""

    try:
        response = requests.get(zip_url)
        response.raise_for_status()  # Raise an error for bad responses

        with zipfile.ZipFile(BytesIO(response.content)) as zip_file:
            for member in zip_file.namelist():
                filename = os.path.basename(member)
                if not filename:
                    continue  # Skip directories

                source = zip_file.open(member)
                target_path = os.path.join(extract_path, filename)

                with open(target_path, "wb") as target_file:
                    target_file.write(source.read())

    except requests.exceptions.RequestException as e:
        print(f"Download error for '{zip_url}': {e}")
    except zipfile.BadZipFile:
        print(f"Invalid ZIP file: '{zip_url}'.")
    except Exception as e:
        print(f"Unexpected error for '{zip_url}': {e}")

# List of Google Drive file IDs
zip_file_ids = ["1As-67MFNrpim_jGYgSOgT2SF119gP790", "1pjaRe_lSIygHyZ8luWplSC6fuwAEmwTd"]

# Get the current working directory
script_directory = os.getcwd()

# Construct direct download links for the ZIP files
zip_urls = [f"https://drive.google.com/uc?export=download&id={file_id}" for file_id in zip_file_ids]

# Download and extract each ZIP file directly into the current working directory
for zip_url in zip_urls:
    download_and_extract_zip(zip_url, script_directory)


In [None]:
import pandas as pd
import os
import glob

# Read CSV files
production_data = pd.read_csv("production.csv")
production_data['date'] = pd.to_datetime(production_data['date'])

weather_data = pd.read_csv("processed_weather.csv")
weather_data['date'] = pd.to_datetime(weather_data['date'])

# Sort the data by date and hour
production_data = production_data.sort_values(by=["date", "hour"])
weather_data = weather_data.sort_values(by=["date", "hour", "lat", "lon"])

# Fill missing values in weather_data using the previous day's same hour, lat, and lon values
for col in weather_data.columns:
    if col in ['date', 'hour', 'lat', 'lon']:
        continue
    weather_data[col] = weather_data.groupby(['hour', 'lat', 'lon'])[col].transform(lambda x: x.fillna(method='ffill'))

# Identify common dates and hours present in both datasets
common_dates = set(production_data["date"].unique()) & set(weather_data["date"].unique())
common_hours = set(production_data["hour"].unique()) & set(weather_data["hour"].unique())

# Save each coordinate's weather data to a separate file
for lat in weather_data["lat"].unique():
    for lon in weather_data["lon"].unique():
        # Filter weather data for the specific coordinate
        coord_weather = weather_data[(weather_data["lat"] == lat) & (weather_data["lon"] == lon)]

        # Create the file name
        dosya_adi = f"koordinat_{lat}_{lon}.csv"

        # Write headers if the file does not exist, otherwise append data without headers
        if not os.path.exists(dosya_adi):
            coord_weather.to_csv(dosya_adi, index=False, header=True)  # Write headers for the first time
        else:
            coord_weather.to_csv(dosya_adi, mode='a', header=False, index=False)  # Do not write headers for subsequent appends

        # Open the file and remove lat and lon columns
        df = pd.read_csv(dosya_adi)
        df = df.drop(["lat", "lon"], axis=1)
        df.to_csv(dosya_adi, index=False)

# Perform correlation analysis for each coordinate
koordinat_korelasyonlari = {}
for dosya_adi in glob.glob("koordinat_*.csv"):
    # Read the coordinate's data
    koordinat_data = pd.read_csv(dosya_adi)
    koordinat_data['date'] = pd.to_datetime(koordinat_data['date'])

    # Filter data for common dates and hours
    koordinat_data = koordinat_data[(koordinat_data["date"].isin(common_dates)) & (koordinat_data["hour"].isin(common_hours))]

    # Merge production data with coordinate data
    merged_data = pd.merge(
        production_data[(production_data["date"].isin(common_dates)) & (production_data["hour"].isin(common_hours))],
        koordinat_data,
        on=["date", "hour"]
    )

    # Calculate correlation for each weather variable
    correlations = {}
    for col in koordinat_data.columns:
        if col in ['date', 'hour']:
            continue
        correlation = merged_data["production"].corr(merged_data[col])
        correlations[col] = correlation

    # Save correlations for the coordinate
    koordinat_korelasyonlari[dosya_adi] = correlations

# Calculate influence factors for each variable for each coordinate
koordinat_etkileri = {}
for koordinat, korelasyonlar in koordinat_korelasyonlari.items():
    koordinat_etkileri[koordinat] = {}
    for degisken, korelasyon in korelasyonlar.items():
        # Influence is calculated as the square of the absolute value of the correlation
        etki = abs(korelasyon) ** 2
        koordinat_etkileri[koordinat][degisken] = etki

# Normalize influence factors for each variable
for degisken in koordinat_etkileri[list(koordinat_etkileri.keys())[0]].keys():
    etki_toplami = sum(koordinat_etkileri[koordinat][degisken] for koordinat in koordinat_etkileri)
    for koordinat in koordinat_etkileri:
        koordinat_etkileri[koordinat][degisken] /= etki_toplami

# Convert influence factors to a DataFrame and print them
etki_df = pd.DataFrame(koordinat_etkileri).T.reset_index()
print("Koordinat Etki Çarpanları:")
print(etki_df.to_string(index=False))

# Multiply each column in coordinate files by the corresponding influence factor and combine the results
sonuc_df = pd.DataFrame()
for dosya_adi in glob.glob("koordinat_*.csv"):
    # Read the coordinate data
    koordinat_data = pd.read_csv(dosya_adi)
    koordinat_data['date'] = pd.to_datetime(koordinat_data['date'])

    # Get the influence factors for the coordinate
    etkiler = koordinat_etkileri[dosya_adi]

    # Multiply each column by the corresponding influence factor
    for degisken, etki in etkiler.items():
        koordinat_data[degisken] = koordinat_data[degisken] * etki

    # Group by date and hour and sum the values
    koordinat_data = koordinat_data.groupby(['date', 'hour']).sum().reset_index()

    # Append to the result DataFrame
    sonuc_df = pd.concat([sonuc_df, koordinat_data], ignore_index=True)

# Group the results by date and hour and sum the values
sonuc_df = sonuc_df.groupby(['date', 'hour']).sum().reset_index()

# Round values to 3 decimal places where necessary
for col in sonuc_df.columns:
    if col in ['date', 'hour']:
        continue
    sonuc_df[col] = sonuc_df[col].round(3)

# Save the final result to a CSV file
sonuc_df.to_csv("agirlikli.csv", index=False)

# Remove the coordinate files as they are no longer needed
for dosya_adi in glob.glob("koordinat_*.csv"):
    os.remove(dosya_adi)
