In [2]:
# Required Libraries
import pandas as pd

# Load the entire CSV file and select the specified columns
full_data = pd.read_csv("../TemperatureData/PartialYearsFiles/k_m_t_2019.csv", encoding="ISO-8859-1", header=None)
selected_full_data = full_data.iloc[:, [0, 1, 2, 3, 4, 5]]
selected_full_data.columns = ['Station Code', 'Station Name', 'Year', 'Month', 'Avg Temp [°C]', 'Status TEMP']

# Filter out rows where the 'Status TEMP' column contains the value "8"
filtered_full_data = selected_full_data[selected_full_data['Status TEMP'] != 8]

filtered_full_data

Unnamed: 0,Station Code,Station Name,Year,Month,Avg Temp [°C],Status TEMP
0,249180010,PSZCZYNA,2019,1,-2.1,
1,249180010,PSZCZYNA,2019,2,2.8,
2,249180010,PSZCZYNA,2019,3,5.9,
3,249180010,PSZCZYNA,2019,4,9.7,
4,249180010,PSZCZYNA,2019,5,11.7,
...,...,...,...,...,...,...
793,254220090,OLECKO,2019,8,18.5,
794,254220090,OLECKO,2019,9,13.1,
795,254220090,OLECKO,2019,10,9.2,
796,254220090,OLECKO,2019,11,4.7,


In [3]:
average_temperature_per_month = filtered_full_data.groupby('Month')['Avg Temp [°C]'].mean()

average_temperature_per_month

Month
1     -3.069118
2      1.885075
3      4.737879
4      8.734848
5     11.668182
6     20.843750
7     18.079687
8     18.996923
9     13.167692
10     9.652308
11     5.936508
12     2.212500
Name: Avg Temp [°C], dtype: float64

In [4]:
def append_average_monthly_temperature(existing_df: pd.DataFrame, csv_path: str) -> pd.DataFrame:
    """
    Append the average temperature for each month across all stations from the given CSV file to an existing dataframe.
    
    Parameters:
    - existing_df (pd.DataFrame): The existing dataframe to which the data should be appended.
    - csv_path (str): Path to the CSV file.
    
    Returns:
    - pd.DataFrame: The updated dataframe with the appended data.
    """
    
    # Load the CSV file and select the specified columns
    full_data = pd.read_csv(csv_path, encoding="ISO-8859-1", header=None)
    selected_data = full_data.iloc[:, [0, 1, 2, 3, 4, 5]]
    selected_data.columns = ['Station Code', 'Station Name', 'Year', 'Month', 'Avg Temp [°C]', 'Status TEMP']

    # Filter out rows where the 'Status TEMP' column contains the value "8"
    filtered_data = selected_data[selected_data['Status TEMP'] != 8]

    # Group by 'Year' and 'Month', then calculate the average temperature for each month across all stations
    average_temperature = filtered_data.groupby(['Year', 'Month'])['Avg Temp [°C]'].mean().reset_index()
    
    # Append the calculated data to the existing dataframe
    updated_df = existing_df.append(average_temperature, ignore_index=True)
    
    return updated_df


In [5]:
csv_files = [
    "../TemperatureData/PartialYearsFiles/k_m_t_2019.csv",
    "../TemperatureData/PartialYearsFiles/k_m_t_2020.csv",
    "../TemperatureData/PartialYearsFiles/k_m_t_2021.csv",
    "../TemperatureData/PartialYearsFiles/k_m_t_2022.csv",
    "../TemperatureData/PartialYearsFiles/k_m_t_2023.csv"
]

# Initializing an empty DataFrame to hold the aggregated data
aggregated_df = pd.DataFrame(columns=['Year', 'Month', 'Avg Temp [°C]'])

# Aggregating data from all CSV files
for file in csv_files:
    aggregated_df = append_average_monthly_temperature(aggregated_df, file)

aggregated_df

TypeError: append_average_monthly_temperature() missing 1 required positional argument: 'csv_path'

In [None]:
aggregated_df.to_csv("aggregatedDataTemperature.csv",index=False, encoding="ISO-8859-1")