In [None]:
import os
import requests
import zipfile
from pathlib import Path

# Kelmarsh

In [None]:

data_dir = Path("Daten")
extract_dir = data_dir 
data_dir.mkdir(parents=True, exist_ok=True)

datasets = {
    "kelmarsh_scada_2016": "https://zenodo.org/record/5841834/files/Kelmarsh_SCADA_2016_3082.zip",
    "kelmarsh_scada_2017": "https://zenodo.org/record/5841834/files/Kelmarsh_SCADA_2017_3083.zip",
    "kelmarsh_scada_2018": "https://zenodo.org/record/5841834/files/Kelmarsh_SCADA_2018_3084.zip",
    "kelmarsh_scada_2019": "https://zenodo.org/record/5841834/files/Kelmarsh_SCADA_2019_3085.zip",
    "kelmarsh_scada_2020": "https://zenodo.org/record/5841834/files/Kelmarsh_SCADA_2020_3086.zip",
    "kelmarsh_scada_2021": "https://zenodo.org/record/5841834/files/Kelmarsh_SCADA_2021_3087.zip",
}

def download_file(url, save_path):
    response = requests.get(url, stream=True)
    response.raise_for_status()
    
    with open(save_path, "wb") as file:
        for chunk in response.iter_content(chunk_size=8192):
            file.write(chunk)
    print(f"Downloaded: {save_path}")

In [None]:

def extract_zip(zip_path, extract_to):
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        zip_ref.extractall(extract_to)
    print(f"📂 Extracted: {zip_path}")

def rename_turbine1_file(folder, prefix):
    for file in folder.glob("*.csv"):
        if "Turbine_Data_Kelmarsh_1" in file.name:
            new_name = folder / "kelmarsh_turbine1.csv"
            file.rename(new_name)
            print(f"Renamed: {file} -> {new_name}")
            return

In [None]:
for dataset, url in datasets.items():
    zip_path = data_dir / f"{dataset}.zip"
    dataset_folder = extract_dir / dataset
    
    download_file(url, zip_path)
    
    dataset_folder.mkdir(parents=True, exist_ok=True)
    extract_zip(zip_path, dataset_folder)
    rename_turbine1_file(dataset_folder, dataset_folder.name)

print("🎉 Alle Dateien wurden heruntergeladen, entpackt und umbenannt!")

# Penmanshiel

In [None]:

data_dir = Path("Daten")
extract_dir = data_dir 
data_dir.mkdir(parents=True, exist_ok=True)

datasets = {
    "Penmanshiel_SCADA_2016": "https://zenodo.org/record/5946808/files/Penmanshiel_SCADA_2016_WT01-10_3107.zip",
    "Penmanshiel_SCADA_2017": "https://zenodo.org/record/5946808/files/Penmanshiel_SCADA_2017_WT01-10_3114.zip",
    "Penmanshiel_SCADA_2018": "https://zenodo.org/record/5946808/files/Penmanshiel_SCADA_2018_WT01-10_3113.zip",
    "Penmanshiel_SCADA_2019": "https://zenodo.org/record/5946808/files/Penmanshiel_SCADA_2019_WT01-10_3112.zip",
    "Penmanshiel_SCADA_2020": "https://zenodo.org/record/5946808/files/Penmanshiel_SCADA_2020_WT01-10_3109.zip",
    "Penmanshiel_SCADA_2021": "https://zenodo.org/record/5946808/files/Penmanshiel_SCADA_2021_WT01-10_3108.zip",
}

def download_file(url, save_path):
    response = requests.get(url, stream=True)
    response.raise_for_status()
    
    with open(save_path, "wb") as file:
        for chunk in response.iter_content(chunk_size=8192):
            file.write(chunk)
    print(f"Downloaded: {save_path}")

In [None]:

def extract_zip(zip_path, extract_to):
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        zip_ref.extractall(extract_to)
    print(f"📂 Extracted: {zip_path}")

def rename_turbine1_file(folder, prefix):
    for file in folder.glob("*.csv"):
        if "Turbine_Data_Penmanshiel_01" in file.name:
            new_name = folder / "penmanshiel_turbine1.csv"
            file.rename(new_name)
            print(f"Renamed: {file} -> {new_name}")
            return

In [None]:
for dataset, url in datasets.items():
    zip_path = data_dir / f"{dataset}.zip"
    dataset_folder = extract_dir / dataset
    
    download_file(url, zip_path)
    
    dataset_folder.mkdir(parents=True, exist_ok=True)
    extract_zip(zip_path, dataset_folder)
    rename_turbine1_file(dataset_folder, dataset_folder.name)

print("🎉 Alle Dateien wurden heruntergeladen, entpackt und umbenannt!")

In [None]:
from pathlib import Path

def clean_csv(file_path):
    with open(file_path, "r", encoding="utf-8") as file:
        lines = file.readlines()

    cleaned_data = []
    for line in lines:
        stripped_line = line.strip()
        if stripped_line.startswith("# Date and time"):
            cleaned_data.append(line[2:])
        elif not stripped_line.startswith("#"):
            cleaned_data.append(line)

    if cleaned_data:
        with open(file_path, "w", encoding="utf-8") as file:
            file.writelines(cleaned_data)
    else:
        print(f"Keine gültigen Daten gefunden in {file_path}")

data_dir = Path("Daten")
for csv_file in data_dir.rglob("*.csv"):
    clean_csv(csv_file)

print("Alle CSV-Dateien wurden bereinigt!")