# Predict the next day temperature
I decided to use the LightGBM model 

### Importing the libraries

In [1]:
import joblib
import os
import pandas as pd
import numpy as np
import os 


### Load the model 

In [2]:
# Load the saved LightGBM model
model_filename = os.path.join(os.getcwd(), 'lgb_model.pkl')
model = joblib.load(model_filename)

### Set up the data

In [3]:
data_dir = os.path.join(os.getcwd(), 'data')
data_path = os.path.join(data_dir, 'preprocessed_data.csv')

df = pd.read_csv(data_path)

In [4]:
df.tail()

Unnamed: 0,time,temperature,dew_point,wind_speed,wind_direction,visibility,clouds.total_cover,relative_humidity,temperature_lag_1,temperature_lag_3,relative_humidity_lag_1,relative_humidity_lag_3,day_of_week,hour_of_day
35054,2022-12-29 19:00:00+00:00,7.0,5.0,0.0,0.0,4000.0,15.0,87.098057,7.0,8.0,93.352226,87.194154,3,19
35055,2022-12-29 20:00:00+00:00,8.0,5.0,1.03,110.0,4000.0,15.0,81.352547,7.0,8.0,87.098057,87.194154,3,20
35056,2022-12-29 21:00:00+00:00,7.0,3.0,0.0,0.0,4000.0,15.0,75.690542,8.0,7.0,81.352547,93.352226,3,21
35057,2022-12-29 22:00:00+00:00,6.0,4.0,6.17,360.0,5000.0,15.0,87.000902,7.0,7.0,75.690542,87.098057,3,22
35058,2022-12-29 23:00:00+00:00,6.0,4.0,0.0,0.0,5000.0,15.0,87.000902,6.0,8.0,87.000902,81.352547,3,23


### Generate the next day data

In [5]:
last_timestamp_str = df['time'].iloc[-1]

In [6]:
last_timestamp_str

'2022-12-29 23:00:00+00:00'

In [7]:
# Parse the last timestamp as a datetime object
last_timestamp = pd.to_datetime(last_timestamp_str)

# Calculate the date for the next day
next_day = last_timestamp + pd.DateOffset(days=1)

future_data_rows = []

In [8]:
next_day

Timestamp('2022-12-30 23:00:00+0000', tz='UTC')

In [9]:
# Define the number of hours in a day (e.g., 24 for a full day)
num_hours_in_day = 24
# Populate the future dataset with values for the next day
for hour in range(num_hours_in_day):
    # Generate random values for each feature
    future_row = {
        'time': next_day.replace(hour=hour),  
        'dew_point': np.random.uniform(0, 10),  
        'wind_speed': np.random.uniform(0, 5),  
        'wind_direction': np.random.uniform(0, 360),  
        'visibility': np.random.uniform(1000, 10000),  
        'clouds.total_cover': np.random.uniform(0, 100),  
        'relative_humidity': np.random.uniform(0, 100),  
        'temperature_lag_1': np.random.uniform(0, 30),  
        'temperature_lag_3': np.random.uniform(0, 30),  
        'relative_humidity_lag_1': np.random.uniform(0, 100),  
        'relative_humidity_lag_3': np.random.uniform(0, 100),  
        'day_of_week': next_day.weekday(),  
        'hour_of_day': hour 
    }

    # Append the row to the list
    future_data_rows.append(future_row)

# Create a DataFrame from the list of future data rows
future_data = pd.DataFrame(future_data_rows)

future_data.head()


Unnamed: 0,time,dew_point,wind_speed,wind_direction,visibility,clouds.total_cover,relative_humidity,temperature_lag_1,temperature_lag_3,relative_humidity_lag_1,relative_humidity_lag_3,day_of_week,hour_of_day
0,2022-12-30 00:00:00+00:00,6.86092,2.52545,349.147731,4750.307961,58.331826,17.458854,22.147434,2.99525,25.264231,80.699471,4,0
1,2022-12-30 01:00:00+00:00,6.552891,4.686549,26.821679,1265.429037,87.224511,18.71287,4.433943,26.066446,22.503685,84.131766,4,1
2,2022-12-30 02:00:00+00:00,3.357618,4.307704,181.425849,4259.729335,76.777199,64.172966,18.897733,14.257307,28.873232,79.697713,4,2
3,2022-12-30 03:00:00+00:00,4.052355,3.124788,345.902727,9841.615101,92.406264,56.10744,16.62299,12.739703,58.341101,33.920888,4,3
4,2022-12-30 04:00:00+00:00,8.310209,2.96585,308.991372,2781.93764,50.880214,42.340896,13.268898,20.655235,68.979623,91.134174,4,4


### Standardize the data

In [10]:
def standardize_numeric_columns(df):
    """
    Standardize all numeric columns in the DataFrame.

    Parameters:
    - df: DataFrame containing time series data.

    Returns:
    - DataFrame with numeric columns (except 'time') standardized.
    """
    numeric_columns = df.select_dtypes(include=['number']).columns
    for column in numeric_columns:
        if column != 'time':
            mean = df[column].mean()
            std = df[column].std()
            df[column] = (df[column] - mean) / std
    return df

In [11]:
future_data = standardize_numeric_columns(future_data)

### Create the prediction model function

In [12]:
# Define a function to predict the temperature for the next day
def next_day_predict(df, model):
    """
    Predict the temperature for the next day.

    Parameters:
    - df: DataFrame containing time series data.
    - model: Trained model to use for prediction.

    Returns:
    - Prediction for the next day.
    """
    # Make a copy of the DataFrame
    df_copy = df.copy()
    # Drop the 'time' column
    df_copy = df_copy.drop('time', axis=1)
    # Predict the temperature for the next day
    prediction = model.predict(df_copy)
    return prediction

prediction = next_day_predict(future_data, model)

In [13]:
# Concatenate the prediction with the future_data DataFrame
future_data['temperature'] = prediction

In [14]:
# Revert the standardization of the temperature column
for column in future_data.columns:
    if column != 'time':
        mean = df[column].mean()
        std = df[column].std()
        future_data[column] = future_data[column] * std + mean

future_data.head()

Unnamed: 0,time,dew_point,wind_speed,wind_direction,visibility,clouds.total_cover,relative_humidity,temperature_lag_1,temperature_lag_3,relative_humidity_lag_1,relative_humidity_lag_3,day_of_week,hour_of_day,temperature
0,2022-12-30 00:00:00+00:00,9.047927,3.184343,359.266095,5764.976159,30.621379,33.924685,27.485449,8.477626,28.769449,68.53417,,0.260089,27.188136
1,2022-12-30 01:00:00+00:00,8.524261,6.400525,-24.534116,3151.03223,49.674265,34.990642,9.731345,30.564262,26.237852,71.499275,,1.235006,11.334257
2,2022-12-30 02:00:00+00:00,3.092124,5.836721,159.556226,5397.002106,42.784928,73.633299,24.228297,19.259076,32.079132,67.668766,,2.209923,20.475671
3,2022-12-30 03:00:00+00:00,4.273214,4.076288,355.402202,9583.872505,53.091301,66.777322,21.948338,17.806235,59.103048,28.122882,,3.18484,19.954046
4,2022-12-30 04:00:00+00:00,11.511797,3.839754,311.451099,4288.537591,25.707515,55.075283,18.586556,25.383974,68.859252,77.548546,,4.159757,19.685874


In [15]:
future_data.tail()

Unnamed: 0,time,dew_point,wind_speed,wind_direction,visibility,clouds.total_cover,relative_humidity,temperature_lag_1,temperature_lag_3,relative_humidity_lag_1,relative_humidity_lag_3,day_of_week,hour_of_day,temperature
19,2022-12-30 19:00:00+00:00,-2.051896,3.649332,327.823946,7731.330372,34.434162,30.230093,13.80974,31.470497,39.863797,36.103938,,18.78351,14.318715
20,2022-12-30 20:00:00+00:00,5.49167,0.094626,306.682497,6797.647168,34.319469,43.638327,11.804006,17.698678,35.908696,42.981819,,19.758427,12.919866
21,2022-12-30 21:00:00+00:00,12.757848,4.610655,48.673538,4929.936351,13.69428,60.626819,25.852124,16.022423,73.507354,54.22089,,20.733344,24.500988
22,2022-12-30 22:00:00+00:00,9.159046,6.008573,144.618329,8802.140994,18.887236,79.665887,17.97346,25.675211,58.941444,65.14969,,21.708261,17.116648
23,2022-12-30 23:00:00+00:00,13.603468,-0.307251,-37.131055,4071.163242,-1.625624,23.091249,26.072787,22.543807,8.003318,32.722857,,22.683178,26.455911
