# Carnavele Cattoni

Project about the average temperature for every month in 2025 in 23 different locations in Ticino:

- 

In [2]:
import pandas as pd
import os
import glob
from datetime import datetime

# 1. Creation of dataset

In [5]:
import pandas as pd
import os
import glob

# Load stations data
stations = pd.read_csv('ticino_stations.csv')

# Directory with temperature data
temperature_dir = './temperature_bayesian'

# Get all CSV files
csv_files = sorted(glob.glob(os.path.join(temperature_dir, '*.csv')))

all_data = []

# Process each temperature CSV file
for csv_file in csv_files:
    # Extract station name from filename
    filename = os.path.basename(csv_file)
    station_name = filename.replace('.csv', '')
    
    # Handle underscore naming conventions
    if '_' in station_name:
        station_name = station_name.replace('_', ' ').title()
    else:
        station_name = station_name.capitalize()
    
    # Fix special cases to match ticino_stations.csv
    station_mapping = {
        'S Bernardino': 'San_Bernardino',
        'S Vittore': 'Sant_Vittore',
        'Delpe Aple': 'Delpe_Aple',
        'Valle Maggia': 'Valle_Maggia',
        'Piotta': 'Piota',
        'Moleno': 'Moleno',
        'Chironico': 'Chirnico'
    }
    
    for key, value in station_mapping.items():
        if key in station_name or station_name.lower() == key.lower().replace(' ', ''):
            station_name = value
            break
    
    try:
        # Read CSV, skipping comment lines (all lines starting with #)
        temp_data = pd.read_csv(
            csv_file, 
            sep=';', 
            comment='#',
            encoding='latin-1',
            dtype={'T': float}
        )
        
        # Remove the trailing semicolon empty column if it exists
        temp_data = temp_data.iloc[:, :3]
        
        # Rename columns
        temp_data.columns = ['date', 'temperature', 'provisional_flag']
        
        # Parse date
        temp_data['date'] = pd.to_datetime(temp_data['date'], format='%d.%m.%Y %H:%M:%S')
        
        # Extract year and month
        temp_data['year'] = temp_data['date'].dt.year
        temp_data['month'] = temp_data['date'].dt.month
        
        # Filter 2025 data only
        temp_data_2025 = temp_data[temp_data['year'] == 2025].copy()
        
        # Keep only relevant columns
        temp_data_2025 = temp_data_2025[['year', 'month', 'temperature']].reset_index(drop=True)
        
        # Add station name
        temp_data_2025.insert(0, 'station', station_name)
        
        all_data.append(temp_data_2025)
        
        print(f"✓ Processed {station_name} ({len(temp_data_2025)} records)")
        
    except Exception as e:
        print(f"✗ Error processing {filename}: {e}")
        import traceback
        traceback.print_exc()

if all_data:
    # Combine all data
    df_merged = pd.concat(all_data, ignore_index=True)

    # Merge with stations data
    df_final = df_merged.merge(
        stations,
        left_on='station',
        right_on='STAZIONE',
        how='left'
    )

    # Select and organize columns
    df_final = df_final[['station', 'year', 'month', 'temperature', 'LATITUDINE', 'LONGITUDINE', 'ALTITUDINE']]

    # Rename for clarity
    df_final.columns = ['station', 'year', 'month', 'temperature', 'latitude', 'longitude', 'altitude']

    # Drop rows with missing values
    df_final = df_final.dropna()

    # Sort by station and month
    df_final = df_final.sort_values(['station', 'month']).reset_index(drop=True)

    # Save the final dataset
    df_final.to_csv('ticino_temperature_2025.csv', index=False)

    # Print summary
    print("\n" + "="*70)
    print("DATASET CREATED SUCCESSFULLY!")
    print("="*70)
    print(f"Shape: {df_final.shape}")
    print(f"Stations: {df_final['station'].nunique()}")
    print(f"Total records: {len(df_final)}")
    print(f"Temperature range: {df_final['temperature'].min():.1f}°C to {df_final['temperature'].max():.1f}°C")
    print(f"Altitude range: {df_final['altitude'].min():.0f}m to {df_final['altitude'].max():.0f}m")
    print(f"\nFirst 15 rows:\n{df_final.head(15).to_string()}")
    print(f"\nSaved to: ticino_temperature_2025.csv")
else:
    print("✗ No data processed!")


✓ Processed Airolo (12 records)
✓ Processed Bioggio (12 records)
✓ Processed Cadenazzo (12 records)
✓ Processed Camignolo (12 records)
✓ Processed Carena (12 records)
✓ Processed Caresio (12 records)
✓ Processed Castaneda (12 records)
✓ Processed Cevio (12 records)
✓ Processed Chirnico (12 records)
✓ Processed Compravasco (12 records)
✓ Processed Delpe_Aple (12 records)
✓ Processed Giubiasco (12 records)
✓ Processed Isone (12 records)
✓ Processed Locarno (12 records)
✓ Processed Lugano (12 records)
✓ Processed Moleno (12 records)
✓ Processed Novaggio (12 records)
✓ Processed Piota (12 records)
✓ Processed Robiei (12 records)
✓ Processed San_Bernardino (12 records)
✓ Processed Sant_Vittore (12 records)
✓ Processed Stabio (12 records)
✓ Processed Valle_Maggia (12 records)
✓ Processed Verbano (12 records)

DATASET CREATED SUCCESSFULLY!
Shape: (252, 7)
Stations: 21
Total records: 252
Temperature range: -5.3°C to 24.2°C
Altitude range: 190m to 1850m

First 15 rows:
      station  year  mont