# Predict the next day temperature
I decided to use the LightGBM model 

### Importing the libraries

In [1]:
import joblib
import os
import pandas as pd
import numpy as np
import os 


### Load the model 

In [2]:
# Load the saved LightGBM model
model_filename = os.path.join(os.getcwd(), 'lgb_model.pkl')
model = joblib.load(model_filename)

### Set up the data

In [3]:
data_dir = os.path.join(os.getcwd(), 'data')
data_path = os.path.join(data_dir, 'preprocessed_data.csv')

df = pd.read_csv(data_path)

### Generate the next day data

In [4]:
last_timestamp_str = df['time'].iloc[-1]

In [5]:
# Parse the last timestamp as a datetime object
last_timestamp = pd.to_datetime(last_timestamp_str)

# Calculate the date for the next day
next_day = last_timestamp + pd.DateOffset(days=1)

future_data_rows = []

In [6]:
# Define the number of hours in a day (e.g., 24 for a full day)
num_hours_in_day = 24
# Populate the future dataset with values for the next day
for hour in range(num_hours_in_day):
    # Generate random values for each feature
    future_row = {
        'time': next_day.replace(hour=hour),  
        'dew_point': np.random.uniform(0, 10),  
        'wind_speed': np.random.uniform(0, 5),  
        'wind_direction': np.random.uniform(0, 360),  
        'visibility': np.random.uniform(1000, 10000),  
        'clouds.total_cover': np.random.uniform(0, 100),  
        'relative_humidity': np.random.uniform(0, 100),  
        'temperature_lag_1': np.random.uniform(0, 30),  
        'temperature_lag_3': np.random.uniform(0, 30),  
        'relative_humidity_lag_1': np.random.uniform(0, 100),  
        'relative_humidity_lag_3': np.random.uniform(0, 100),  
        'day_of_week': next_day.weekday(),  
        'hour_of_day': hour 
    }

    # Append the row to the list
    future_data_rows.append(future_row)

# Create a DataFrame from the list of future data rows
future_data = pd.DataFrame(future_data_rows)

future_data.head()


Unnamed: 0,time,dew_point,wind_speed,wind_direction,visibility,clouds.total_cover,relative_humidity,temperature_lag_1,temperature_lag_3,relative_humidity_lag_1,relative_humidity_lag_3,day_of_week,hour_of_day
0,2022-12-30 00:00:00+00:00,2.809422,3.442459,177.289242,1378.955594,84.232884,44.718543,17.321472,8.225378,16.170843,11.839865,4,0
1,2022-12-30 01:00:00+00:00,4.796881,3.40503,289.163291,8957.478083,14.725603,61.978782,26.641488,8.888746,69.148606,64.897731,4,1
2,2022-12-30 02:00:00+00:00,2.164562,4.40224,220.485415,3787.208726,97.865109,94.333698,28.687747,15.986475,50.813369,4.086236,4,2
3,2022-12-30 03:00:00+00:00,6.435202,2.828578,170.284072,7702.591852,78.439457,26.225274,6.278757,2.662478,69.008744,84.250016,4,3
4,2022-12-30 04:00:00+00:00,6.364329,2.156187,39.217115,4040.67527,98.777107,31.649374,18.123767,6.430992,51.432869,44.289611,4,4


### Standardize the data

In [7]:
def standardize_numeric_columns(df):
    """
    Standardize all numeric columns in the DataFrame.

    Parameters:
    - df: DataFrame containing time series data.

    Returns:
    - DataFrame with numeric columns (except 'time') standardized.
    """
    numeric_columns = df.select_dtypes(include=['number']).columns
    for column in numeric_columns:
        if column != 'time':
            mean = df[column].mean()
            std = df[column].std()
            df[column] = (df[column] - mean) / std
    return df

In [8]:
future_data = standardize_numeric_columns(future_data)

### Create the prediction model function

In [9]:
# Define a function to predict the temperature for the next day
def next_day_predict(df, model):
    """
    Predict the temperature for the next day.

    Parameters:
    - df: DataFrame containing time series data.
    - model: Trained model to use for prediction.

    Returns:
    - Prediction for the next day.
    """
    # Make a copy of the DataFrame
    df_copy = df.copy()
    # Drop the 'time' column
    df_copy = df_copy.drop('time', axis=1)
    # Predict the temperature for the next day
    prediction = model.predict(df_copy)
    return prediction

prediction = next_day_predict(future_data, model)

In [10]:
# Concatenate the prediction with the future_data DataFrame
future_data['temperature'] = prediction

In [11]:
# Revert the standardization of the temperature column
for column in future_data.columns:
    if column != 'time':
        mean = df[column].mean()
        std = df[column].std()
        future_data[column] = future_data[column] * std + mean

future_data.head()

Unnamed: 0,time,dew_point,wind_speed,wind_direction,visibility,clouds.total_cover,relative_humidity,temperature_lag_1,temperature_lag_3,relative_humidity_lag_1,relative_humidity_lag_3,day_of_week,hour_of_day,temperature
0,2022-12-30 00:00:00+00:00,4.296507,4.716209,182.631322,2959.59009,41.526284,43.608334,22.957134,13.274304,21.69361,17.678362,,0.260089,20.757739
1,2022-12-30 01:00:00+00:00,7.965886,4.649093,299.961452,8676.214705,-4.667142,57.262815,32.05853,13.927157,60.751871,61.164874,,1.235006,30.664473
2,2022-12-30 02:00:00+00:00,3.105923,6.437239,227.934166,4776.181536,50.586042,82.858614,34.05679,20.912385,47.234077,11.323446,,2.209923,31.080188
3,2022-12-30 03:00:00+00:00,10.990662,3.61543,175.284512,7729.629994,37.676065,28.978409,12.173449,7.799577,60.648756,77.026111,,3.18484,16.806978
4,2022-12-30 04:00:00+00:00,10.859812,2.409731,37.825438,4967.376203,51.192142,33.269385,23.74061,11.508359,47.690808,44.274348,,4.159757,25.911989


In [12]:
future_data.tail()

Unnamed: 0,time,dew_point,wind_speed,wind_direction,visibility,clouds.total_cover,relative_humidity,temperature_lag_1,temperature_lag_3,relative_humidity_lag_1,relative_humidity_lag_3,day_of_week,hour_of_day,temperature
19,2022-12-30 19:00:00+00:00,5.480542,4.661254,250.939039,4311.751332,16.166606,37.16624,21.989611,12.622093,18.430262,41.100224,,18.78351,20.923412
20,2022-12-30 20:00:00+00:00,9.673311,1.079208,136.259817,9036.751735,41.037484,50.976552,17.169017,21.480977,73.89676,22.96512,,19.758427,17.818993
21,2022-12-30 21:00:00+00:00,13.371758,1.204854,31.071997,4564.533085,32.288594,34.49166,25.14401,33.730328,81.030473,76.769417,,20.733344,27.746646
22,2022-12-30 22:00:00+00:00,3.353259,-0.219275,4.498667,8850.456906,46.583242,37.397272,7.482368,11.345684,73.164687,76.519624,,21.708261,10.716695
23,2022-12-30 23:00:00+00:00,1.025313,0.416836,150.900985,3716.544415,14.002353,83.563925,18.78108,29.746804,9.941398,24.482625,,22.683178,16.142562
