In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

# Define feature columns
FEATURE_COLS = ['HUMIDITY', 'WIND_SPEED', 'CLOUD_COVER', 'TEMP']
TIME_COLS = ['YEAR', 'MONTH', 'DAY', 'HOUR']

def load_and_prepare_data(file_path, thresholds):
    # Load full data first
    full_df = pd.read_csv(file_path)
    full_df.columns = full_df.columns.str.strip()
    
    if 'TEMP' not in full_df.columns:
        raise ValueError("The 'TEMP' column is missing in the data.")

    # TEMP conversion
    full_df['TEMP'] = full_df['TEMP'] / 10
    full_df['DATETIME'] = pd.to_datetime(full_df[TIME_COLS])
    full_df[FEATURE_COLS + TIME_COLS] = full_df[FEATURE_COLS + TIME_COLS].apply(pd.to_numeric, errors='coerce')
    full_df.dropna(subset=FEATURE_COLS + TIME_COLS, inplace=True)

    # Now filter to get training data
    filtered_df = full_df.copy()
    for key, value in thresholds.items():
        filtered_df = filtered_df[filtered_df[key] == value]

    if filtered_df.shape[0] < 2:
        raise ValueError("Not enough data after filtering.")

    delta = np.diff(filtered_df['TEMP'].values)
    X = filtered_df[FEATURE_COLS].iloc[:-1]
    last_row = filtered_df.iloc[-1].copy()

    return full_df, X, delta, last_row

def build_model(X, y):
    return Pipeline([
        ('scaler', StandardScaler()),
        ('regressor', LinearRegression())
    ]).fit(X, y)

def predict_current_and_future_deltas(model, last_row, full_df, start_hour, end_hour=23):
    predicted_deltas = []

    # Use constant features from the last known row
    constant_features = {
        'HUMIDITY': last_row['HUMIDITY'],
        'WIND_SPEED': last_row['WIND_SPEED'],
        'CLOUD_COVER': last_row['CLOUD_COVER']
    }

    # Predict delta for current hour
    current_input = last_row[FEATURE_COLS].values.reshape(1, -1)
    current_delta = model.predict(current_input)[0]
    predicted_deltas.append((start_hour, last_row['TEMP'] + current_delta))

    # Predict for each remaining hour
    for hour in range(start_hour + 1, end_hour + 1):
        temp_row = full_df[
            (full_df['YEAR'] == last_row['YEAR']) &
            (full_df['MONTH'] == last_row['MONTH']) &
            (full_df['DAY'] == last_row['DAY']) &
            (full_df['HOUR'] == hour)
        ]

        if temp_row.empty:
            continue

        original_temp = temp_row['TEMP'].values[0]

        input_row = pd.DataFrame([{
            'TEMP': original_temp,
            'HUMIDITY': constant_features['HUMIDITY'],
            'WIND_SPEED': constant_features['WIND_SPEED'],
            'CLOUD_COVER': constant_features['CLOUD_COVER']
        }])[FEATURE_COLS]

        predicted_delta = model.predict(input_row)[0]
        predicted_temp = original_temp + predicted_delta
        predicted_deltas.append((hour, predicted_temp))

    return predicted_deltas

def plot_temperature_forecast(last_row, predicted_deltas):
    datetimes = [
        last_row['DATETIME'].replace(hour=hour) + pd.Timedelta(hours=(1 if hour != last_row['HOUR'] else 0))
        for hour, _ in predicted_deltas
    ]
    temps = [temp for _, temp in predicted_deltas]

    plt.figure(figsize=(12, 6))
    plt.plot(datetimes, temps, marker='o', linestyle='-', color='b')
    plt.title(f"Temperature Forecast from Hour {int(last_row['HOUR'])} to 23")
    plt.xlabel("Datetime")
    plt.ylabel("Predicted Temperature (°C)")
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# --- Main Execution ---
if __name__ == "__main__":
    file_path = '_temperature_data.csv'
    thresholds = {'MONTH': 8, 'HOUR': 7}

    # Step 1: Load full and filtered data
    full_df, X, y, last_row = load_and_prepare_data(file_path, thresholds)

    # Step 2: Train model
    model = build_model(X, y)

    # Step 3: Predict deltas from last_row hour to 23 using full data
    start_hour = int(last_row['HOUR'])
    predicted_deltas = predict_current_and_future_deltas(model, last_row, full_df, start_hour)

    # Step 4: Print predictions
    print("Predicted temperatures:")
    for hour, temp in predicted_deltas:
        print(f"Hour {hour}: {temp:.2f}°C")

    # Step 5: Plot forecast
    plot_temperature_forecast(last_row, predicted_deltas)

NameError: name '_name_' is not defined