<a href="https://colab.research.google.com/github/22022658NguyenTienKhoi/13-Weather-Forecast/blob/main/regressors.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [94]:
from prophet import Prophet
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np

In [95]:
weather = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/hanoiweather.csv")

In [96]:
weather.set_index('datetime',inplace = True)

In [97]:
weather = weather['1995-01-01':]

In [98]:
# Clean invalid columns
null_pct = weather.apply(pd.isnull).sum()/weather.shape[0]
valid_columns = weather.columns[null_pct < .725]
weather = weather.fillna(0)
weather = weather[valid_columns].copy()
weather.columns = weather.columns.str.lower()
weather.index = pd.to_datetime(weather.index)

In [99]:
# Setup time series for prophet
weather["y"] = weather.shift(-1)["tempmax"]
weather = weather.ffill()
weather["ds"] = weather.index

In [100]:
weather.drop(columns=['icon','description','stations','conditions','sunrise','sunset','name','preciptype'], inplace=True)

In [101]:
regressors = weather.columns[~weather.columns.isin(['y','ds'])]

In [102]:
regressors

Index(['tempmax', 'tempmin', 'temp', 'feelslikemax', 'feelslikemin',
       'feelslike', 'dew', 'humidity', 'precip', 'precipprob', 'precipcover',
       'snow', 'snowdepth', 'windgust', 'windspeed', 'winddir',
       'sealevelpressure', 'cloudcover', 'visibility', 'solarradiation',
       'solarenergy', 'uvindex', 'moonphase'],
      dtype='object')

In [103]:
import random
from deap import base, creator, tools
from prophet.diagnostics import cross_validation, performance_metrics

#Define evaluation
def evaluate_forecast(forecast):
    return np.mean((forecast['yhat'] - weather['y'])**2)

# Define your fitness function (example: minimize error, maximize accuracy)
def evaluate_regressor_combination(individual):
    # Convert binary individual to a list of selected regressors
    selected_regressors = [regressor for include, regressor in zip(individual, regressors) if include]

    # Example: Calculate fitness (you should replace this with your actual fitness evaluation)
    # Initialize Prophet model
    model = Prophet()

    # Add selected regressors to the model
    for regressor in selected_regressors:
        model.add_regressor(regressor)

    # Fit the model
    model.fit(weather)
    # Cross validate across data set
    cv = cross_validation(model, horizon='30 days')
    # Evaluate model performance on validation data
    # Define your evaluation function and return fitness score
    fitness_score = evaluate_forecast(cv)

    return (fitness_score,)

# Genetic Algorithm parameters
population_size = 20
num_generations = 10
crossover_prob = 0.8
mutation_prob = 0.2

# Create the DEAP toolbox
toolbox = base.Toolbox()

# Define a fitness maximizing objective (1.0 for maximizing fitness)
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

# Register functions for creating individuals and populations
toolbox.register("attr_bool", random.randint, 0, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, len(regressors))
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

# Register genetic operators
toolbox.register("evaluate", evaluate_regressor_combination)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)

def main():
    # Create initial population
    population = toolbox.population(n=population_size)

    # Evaluate the entire population
    fitnesses = list(map(toolbox.evaluate, population))
    for ind, fit in zip(population, fitnesses):
        ind.fitness.values = fit

    # Begin the evolution
    for generation in range(num_generations):
        print(f"Generation {generation + 1}/{num_generations}")

        # Select the next generation individuals
        offspring = toolbox.select(population, len(population))

        # Clone selected individuals
        offspring = list(map(toolbox.clone, offspring))

        # Apply crossover and mutation on the offspring
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < crossover_prob:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values

        for mutant in offspring:
            if random.random() < mutation_prob:
                toolbox.mutate(mutant)
                del mutant.fitness.values

        # Evaluate the individuals with invalid fitness
        invalid_individuals = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_individuals)
        for ind, fit in zip(invalid_individuals, fitnesses):
            ind.fitness.values = fit

        # Replace the population with the offspring
        population[:] = offspring

        # Gather all the fitnesses in the population
        fits = [ind.fitness.values[0] for ind in population]

        # Print statistics for the current generation
        print(f"  Min Fitness: {min(fits)}")
        print(f"  Max Fitness: {max(fits)}")

    # Select the best individual
    best_individual = tools.selBest(population, 1)[0]
    best_regressor_combination = [regressor for include, regressor in zip(best_individual, regressors) if include]

    print("\nBest Regressor Combination:")
    print(best_regressor_combination)

if __name__ == "__main__":
    main()


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmp3qk0a0m6/5s9kr3j6.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp3qk0a0m6/v7vdgiij.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=32768', 'data', 'file=/tmp/tmp3qk0a0m6/5s9kr3j6.json', 'init=/tmp/tmp3qk0a0m6/v7vdgiij.json', 'output', 'file=/tmp/tmp3qk0a0m6/prophet_modelntqdn9rw/prophet_model-20240511025153.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
02:51:53 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
02:51:58 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Making 688 forecasts with cutoffs between 1996-01-13 00:00:00 and 2024-03-31 00:00:00


  0%|          | 0/688 [00:00<?, ?it/s]

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
INFO:cmdstanpy:Chain [1] start processing
02:53:26 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
DEBUG:cmdstanpy:input tempfile: /tmp/tmp3qk0a0m6/4hfdn72y.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp3qk0a0m6/li3kf4wq.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=46204', 'data', 'file=/tmp/tmp3qk0a0m6/4hfdn72y.json', 'init=/tmp/tmp3qk0a0m6/li3kf4wq.json', 'output', 'file=/tmp/tmp3qk0a0m6/prophet_modelza4ta1pj/prophet_model-20240511025326.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
02:53:26 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
02:53:27 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
DEBUG:cmdstanpy:input tempfile: /tmp/t

  0%|          | 0/688 [00:00<?, ?it/s]

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
INFO:cmdstanpy:Chain [1] start processing
03:20:04 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
DEBUG:cmdstanpy:input tempfile: /tmp/tmp3qk0a0m6/k1lr5_lm.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp3qk0a0m6/5f_imblh.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=25624', 'data', 'file=/tmp/tmp3qk0a0m6/k1lr5_lm.json', 'init=/tmp/tmp3qk0a0m6/5f_imblh.json', 'output', 'file=/tmp/tmp3qk0a0m6/prophet_modelzi6t8362/prophet_model-20240511032004.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
03:20:04 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
03:20:05 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
DEBUG:cmdstanpy:input tempfile: /tmp/t

  0%|          | 0/688 [00:00<?, ?it/s]

DEBUG:cmdstanpy:input tempfile: /tmp/tmp3qk0a0m6/dzelmg8a.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp3qk0a0m6/kfq2wn3o.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=85023', 'data', 'file=/tmp/tmp3qk0a0m6/dzelmg8a.json', 'init=/tmp/tmp3qk0a0m6/kfq2wn3o.json', 'output', 'file=/tmp/tmp3qk0a0m6/prophet_model2b1jy6rf/prophet_model-20240511034506.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
03:45:06 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
03:45:06 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
DEBUG:cmdstanpy:input tempfile: /tmp/tmp3qk0a0m6/msjh_nm5.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp3qk0a0m6/_725oozp.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/

KeyboardInterrupt: 