In [1]:
import pandas as pd
import os

# Function to process each CSV file and calculate average yearly rainfall
def process_rainfall_data(file_path, output_file, year):
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # First, filter out any rows where the 'time' column starts with the next year
    # Remove rows where the year is greater than the given year
    df = df[df['time'].str.startswith(year)]
    
    # Now convert 'time' column to datetime format
    df['time'] = pd.to_datetime(df['time'], errors='coerce')
    
    # Drop rows with invalid timestamps (i.e., those that couldn't be parsed)
    df = df.dropna(subset=['time'])
    
    # Extract year from the 'time' column
    df['year'] = df['time'].dt.year
    
    # Group by latitude, longitude, and year, then calculate the average rainfall
    yearly_rainfall = df.groupby(['year', 'longitude', 'latitude'])['APCP_sfc'].mean().reset_index()
    
    # Export the result to a new CSV file
    yearly_rainfall.to_csv(output_file, index=False)
    print(f"Processed and saved: {output_file}")

# Function to iterate through all files and process them
def process_all_files(input_folder, output_folder):
    for file_name in os.listdir(input_folder):
        if file_name.startswith('rain') and file_name.endswith('.csv'):
            year = file_name[4:8]  # Extract year from filename (e.g., '2012' from 'rain2012.csv')
            input_file = os.path.join(input_folder, file_name)
            output_file = os.path.join(output_folder, f"yearly_rain{year}.csv")
            process_rainfall_data(input_file, output_file, year)

# Define input and output folders
input_folder = r"Z:\Rainfall data and processing\Rainfall_data"  # Replace with the path to your input folder
output_folder = r"Z:\Rainfall data and processing\Rainfall_Data_Yearly_Lat_Long"  # Replace with the path to your output folder

# Process all CSV files
process_all_files(input_folder, output_folder)


Processed and saved: Z:\Rainfall data and processing\Rainfall_Data_Yearly_Lat_Long\yearly_rain2012.csv
Processed and saved: Z:\Rainfall data and processing\Rainfall_Data_Yearly_Lat_Long\yearly_rain2013.csv
Processed and saved: Z:\Rainfall data and processing\Rainfall_Data_Yearly_Lat_Long\yearly_rain2014.csv
Processed and saved: Z:\Rainfall data and processing\Rainfall_Data_Yearly_Lat_Long\yearly_rain2015.csv
Processed and saved: Z:\Rainfall data and processing\Rainfall_Data_Yearly_Lat_Long\yearly_rain2016.csv
Processed and saved: Z:\Rainfall data and processing\Rainfall_Data_Yearly_Lat_Long\yearly_rain2017.csv
Processed and saved: Z:\Rainfall data and processing\Rainfall_Data_Yearly_Lat_Long\yearly_rain2018.csv
Processed and saved: Z:\Rainfall data and processing\Rainfall_Data_Yearly_Lat_Long\yearly_rain2019.csv
Processed and saved: Z:\Rainfall data and processing\Rainfall_Data_Yearly_Lat_Long\yearly_rain2020.csv
Processed and saved: Z:\Rainfall data and processing\Rainfall_Data_Yearly