In [1]:
import joblib
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Load the trained model and the preprocessing pipeline
best_model = joblib.load('models/xgboost_model.pkl')
preprocessor = joblib.load('data/preprocessor.pkl')

# Define the prediction function with increased variation
def predict_future_max_temp(province_name, days=5000):
    try:
        # Create a DataFrame with default values for other features
        current_date = datetime.now()
        date_list = [current_date + timedelta(days=i) for i in range(days)]
        
        # Introduce variation with a trend and slight random noise
        min_temps = 25 + np.sin(np.linspace(0, 4 * np.pi, days)) * 2 + np.random.normal(0, 0.5, days)  # More variation
        wind_speeds = 10 + np.sin(np.linspace(0, 4 * np.pi, days)) * 1 + np.random.normal(0, 0.2, days)  # More variation
        humidities = 70 + np.sin(np.linspace(0, 4 * np.pi, days)) * 2 + np.random.normal(0, 0.5, days)  # More variation
        cloud_coverage = 50 + np.sin(np.linspace(0, 4 * np.pi, days)) * 7 + np.random.normal(0, 1, days)  # More variation
        pressures = 1013 + np.sin(np.linspace(0, 4 * np.pi, days)) * 2 + np.random.normal(0, 0.5, days)  # More variation
        
        input_data = pd.DataFrame({
            'province': [province_name] * days,
            'wind_d': ['N'] * days,  # Assuming a default wind direction
            'min': min_temps,
            'wind': wind_speeds,
            'rain': [0.0] * days,  # Assuming no rain
            'humidi': humidities,
            'cloud': cloud_coverage,
            'pressure': pressures
        })

        # Preprocess the input data
        input_preprocessed = preprocessor.transform(input_data)

        # Predict the max temperature
        max_temp_predictions = best_model.predict(input_preprocessed)

        # Combine dates with predictions
        prediction_list = [(date, temp) for date, temp in zip(date_list, max_temp_predictions)]

        # Print the predictions
        for date, temp in prediction_list:
            print(f"Date: {date.strftime('%Y-%m-%d')}, Predicted Max Temp: {temp:.2f}°C")

        return prediction_list

    except Exception as e:
        print(f"An error occurred during prediction: {e}")

# Example usage
predict_future_max_temp('Hanoi', days=5000)


Date: 2024-08-26, Predicted Max Temp: 32.38°C
Date: 2024-08-27, Predicted Max Temp: 32.82°C
Date: 2024-08-28, Predicted Max Temp: 31.83°C
Date: 2024-08-29, Predicted Max Temp: 32.10°C
Date: 2024-08-30, Predicted Max Temp: 31.83°C
Date: 2024-08-31, Predicted Max Temp: 32.46°C
Date: 2024-09-01, Predicted Max Temp: 31.84°C
Date: 2024-09-02, Predicted Max Temp: 32.37°C
Date: 2024-09-03, Predicted Max Temp: 32.63°C
Date: 2024-09-04, Predicted Max Temp: 32.37°C
Date: 2024-09-05, Predicted Max Temp: 32.10°C
Date: 2024-09-06, Predicted Max Temp: 32.83°C
Date: 2024-09-07, Predicted Max Temp: 32.46°C
Date: 2024-09-08, Predicted Max Temp: 32.46°C
Date: 2024-09-09, Predicted Max Temp: 32.45°C
Date: 2024-09-10, Predicted Max Temp: 32.10°C
Date: 2024-09-11, Predicted Max Temp: 32.09°C
Date: 2024-09-12, Predicted Max Temp: 32.38°C
Date: 2024-09-13, Predicted Max Temp: 31.71°C
Date: 2024-09-14, Predicted Max Temp: 32.37°C
Date: 2024-09-15, Predicted Max Temp: 32.10°C
Date: 2024-09-16, Predicted Max Te

[(datetime.datetime(2024, 8, 26, 8, 10, 50, 944834), np.float32(32.384396)),
 (datetime.datetime(2024, 8, 27, 8, 10, 50, 944834), np.float32(32.820328)),
 (datetime.datetime(2024, 8, 28, 8, 10, 50, 944834), np.float32(31.83334)),
 (datetime.datetime(2024, 8, 29, 8, 10, 50, 944834), np.float32(32.095005)),
 (datetime.datetime(2024, 8, 30, 8, 10, 50, 944834), np.float32(31.83334)),
 (datetime.datetime(2024, 8, 31, 8, 10, 50, 944834), np.float32(32.460926)),
 (datetime.datetime(2024, 9, 1, 8, 10, 50, 944834), np.float32(31.836699)),
 (datetime.datetime(2024, 9, 2, 8, 10, 50, 944834), np.float32(32.367287)),
 (datetime.datetime(2024, 9, 3, 8, 10, 50, 944834), np.float32(32.628963)),
 (datetime.datetime(2024, 9, 4, 8, 10, 50, 944834), np.float32(32.367287)),
 (datetime.datetime(2024, 9, 5, 8, 10, 50, 944834), np.float32(32.098366)),
 (datetime.datetime(2024, 9, 6, 8, 10, 50, 944834), np.float32(32.830647)),
 (datetime.datetime(2024, 9, 7, 8, 10, 50, 944834), np.float32(32.464287)),
 (dateti