In [1]:
import pandas as pd
import numpy as np
from math import radians, sin, cos, atan2, sqrt
import os

In [2]:
# Function to calculate average WND in polar coordinates
def average_wind(wind_data):
    u_total, v_total, count = 0, 0, 0

    for wind in wind_data:
        if wind and wind != "(None, None)":
            direction, speed = eval(wind)
            if direction is not None and speed is not None:
                direction_rad = radians(direction)
                u_total += speed * cos(direction_rad)
                v_total += speed * sin(direction_rad)
                count += 1

    if count == 0:
        return None

    avg_u = u_total / count
    avg_v = v_total / count
    avg_speed = sqrt(avg_u**2 + avg_v**2)
    avg_direction = (atan2(avg_v, avg_u) * 180 / np.pi) % 360

    return avg_direction, avg_speed

In [3]:
# Group by day and calculate averages
def calculate_daily_averages(df):
    daily_averages = []

    for day, group in df.groupby('DAY'):
        avg_wnd = average_wind(group['WND'])
        avg_cig = group['CIG'].dropna().mean()
        avg_vis = group['VIS'].dropna().mean()
        avg_tmp = group['TMP'].dropna().mean()
        avg_dew = group['DEW'].dropna().mean()
        avg_slp = group['SLP'].dropna().mean()

        daily_averages.append({
            'DAY': day,
            'WND': avg_wnd,
            'CIG': avg_cig,
            'VIS': avg_vis,
            'TMP': avg_tmp,
            'DEW': avg_dew,
            'SLP': avg_slp
        })

    return pd.DataFrame(daily_averages)

In [None]:
# Process multiple files
def process_files(file_list):
    all_daily_averages = []

    for file in file_list:
            df = pd.read_csv(file)

            # Convert DATE to a datetime object and extract the date
            df['DATE'] = pd.to_datetime(df['DATE'], format='%Y%m%d%H%M%S')
            df['DAY'] = df['DATE'].dt.date

            daily_averages_df = calculate_daily_averages(df)
            all_daily_averages.append(daily_averages_df)

    # Combine all daily averages into a single DataFrame
    return pd.concat(all_daily_averages, ignore_index=True)

In [None]:
# List of files from 2014 to 2023
file_list = [f"~/AQIprediction/aqi_preproc/beijing_{year}.csv" for year in range(2014, 2024)]

# Calculate daily averages for all files
all_daily_averages_df = process_files(file_list)

# Save the result to a CSV file
all_daily_averages_df.to_csv("~/AQIprediction/daily_climate.csv", index=False)

# Print the result
print(all_daily_averages_df)