In [4]:
import pandas as pd
import os
import re
from datetime import datetime

# Define the stations
stations = [
    {'station': 33345, 'city': 'Kyiv'},
    {'station': 33347, 'city': 'Boryspil'},
    {'station': 33231, 'city': 'Chornobyl'},
    {'station': 33339, 'city': 'Fastiv'},
    {'station': 33464, 'city': 'Byla_Tzerkva'},
    {'station': 33354, 'city': 'Baryshivka'},
    {'station': 33356, 'city': 'Yagotyn'},
    {'station': 33466, 'city': 'Myronivka'}
]

# Get the list of .prc files from the directory
folder_path = '/home/lol/jupyter/UHMI/2021_precipitation'  # Replace with the actual folder path
file_paths = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.prc')]

# Initialize an empty list to hold the results
results = []

# Function to process each file
def process_file(file_path):
    # Extract date from the file name
    file_name = os.path.basename(file_path)
    date_match = re.match(r'(\d{2})\.(\d{2})\.(\d{4})', file_name)
    if not date_match:
        print(f"Filename {file_name} does not match the expected date format.")
        return
    day, month, year = map(int, date_match.groups())
    date = datetime(year, month, day)

    # Read the content of the file
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # Process each line and extract data
    data = {}
    for line in lines:
        parts = line.split()
        if len(parts) != 3:
            continue
        station_id, time_str, value = parts
        station_id = int(station_id)
        time = int(time_str)
        value = float(value)
        
        # Only keep the values for the specified times (06:00 and 18:00)
        if time not in [6, 18]:
            continue
        
        # Add the data to the dictionary
        if station_id not in data:
            data[station_id] = {}
        data[station_id][time] = value

    # Create rows for the times 06:00 and 18:00
    for time in [6, 18]:
        row = {'time': date.replace(hour=time, minute=0)}
        for station in stations:
            station_id = station['station']
            station_key = f'{station["city"]}_{station_id}'
            # Round values to two decimal places
            row[station_key] = round(data.get(station_id, {}).get(time, 0.0), 2)
        results.append(row)

# Process each file
for file_path in file_paths:
    process_file(file_path)

# Convert the results to a DataFrame
df_results = pd.DataFrame(results)

# Ensure 'time' column is the first column
df_results = df_results[['time'] + [col for col in df_results.columns if col != 'time']]

# Sort the DataFrame by 'time'
df_results = df_results.sort_values(by='time').reset_index(drop=True)

# Save the DataFrame to a CSV file
df_results.to_csv('6_18_from_stations.csv', index=False)

print(df_results)


                   time  Kyiv_33345  Boryspil_33347  Chornobyl_33231  \
0   2021-04-01 06:00:00         0.0             0.0              0.0   
1   2021-04-01 18:00:00         0.0             0.0              0.0   
2   2021-04-02 06:00:00         1.0             0.9              1.0   
3   2021-04-02 18:00:00         5.0             3.0              0.9   
4   2021-04-03 06:00:00         0.0             0.0              0.0   
..                  ...         ...             ...              ...   
357 2021-09-28 18:00:00         0.0             0.0              0.0   
358 2021-09-29 06:00:00         0.0             0.0              0.0   
359 2021-09-29 18:00:00         0.0             0.0              0.0   
360 2021-09-30 06:00:00         0.0             0.0              0.0   
361 2021-09-30 18:00:00         0.0             0.0              0.0   

     Fastiv_33339  Byla_Tzerkva_33464  Baryshivka_33354  Yagotyn_33356  \
0             0.0                 0.0               0.0      