In [17]:
import torch
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import os

# Additional metric for MAPE
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Load and preprocess the air pollution data
def load_and_preprocess_data(file_path, target_feature):
    data = pd.read_csv(file_path)

    # Correct the date format using dayfirst=True
    data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y %H:%M', dayfirst=True, errors='coerce')

    # Drop rows with invalid or missing date values
    data = data.dropna(subset=['Date'])

    # Set the 'Date' column as the index
    features = data.copy()
    features.set_index('Date', inplace=True)

    # Ensure the target feature exists
    if target_feature not in features.columns:
        raise ValueError(f"Target column '{target_feature}' not found in the dataset.")

    # Extract all feature columns except the target
    feature_columns = features.columns

    # Scale the data
    feature_scaler = MinMaxScaler()
    target_scaler = MinMaxScaler()

    # Fit and transform the feature data
    features_scaled = feature_scaler.fit_transform(features[feature_columns])

    # Fit and transform the target data
    target_scaled = target_scaler.fit_transform(features[[target_feature]])

    return features_scaled, target_scaled, feature_scaler, target_scaler

# Prepare the dataset for the model input (general structure for time series)
def create_dataset(features, target, seq_length):
    X, y = [], []

    for i in range(len(features) - seq_length):
        X.append(features[i:i + seq_length].flatten())
        y.append(target[i + seq_length])

    return torch.tensor(np.array(X), dtype=torch.float32), torch.tensor(np.array(y), dtype=torch.float32)

# Placeholder for TimeGPT model (replace with actual TimeGPT model or API call)
class TimeGPTModel:
    def __init__(self, input_size):
        self.input_size = input_size

    def predict(self, X):
        # Simulate the TimeGPT prediction logic here (replace with actual API call or model inference)
        return X.mean(dim=1, keepdim=True)  # Dummy prediction logic, replace with real inference

# Testing the model (no training)
def test_model(model, X_test, target_scaler):
    with torch.no_grad():
        predictions = model.predict(X_test).squeeze()

        # Ensure non-negative predictions
        predictions = torch.relu(predictions).numpy()

        # Rescale predictions back to original scale
        predictions_rescaled = target_scaler.inverse_transform(predictions.reshape(-1, 1))
        return predictions_rescaled

# Evaluate model and store results
def evaluate_model(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return mse, rmse, mae, mape, r2

# Save results to CSV
def save_results(predictions, actual_values, feature_name, output_dir):
    results = pd.DataFrame({
        "Predictions": predictions.flatten(),
        "Actual": actual_values.flatten()
    })
    file_path = os.path.join(output_dir, f"{feature_name}_predictions.csv")
    results.to_csv(file_path, index=False)
    print(f"Results saved to: {file_path}")

# Main function to load data, test, and evaluate the model
if __name__ == "__main__":
    output_dir = r"C:/Users/SPURGE/Desktop/JVTI/Python Language/TimesFM/RESULTS"
    os.makedirs(output_dir, exist_ok=True)

    file_path = r"C:/Users/SPURGE/Desktop/JVTI/Python Language/TimesFM/pm_sr.csv"
    target_features = ['PM2.5', 'PM10', 'RH', 'SR']
    seq_length = 12

    all_results = []

    for target_feature in target_features:
        print(f"Evaluating TimeGPT model for target: {target_feature}")

        features_scaled, target_scaled, feature_scaler, target_scaler = load_and_preprocess_data(file_path, target_feature)
        X, y = create_dataset(features_scaled, target_scaled, seq_length)

        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
        X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

        input_size = X_train.shape[1]
        model = TimeGPTModel(input_size)

        predictions = test_model(model, X_test, target_scaler)
        y_test_rescaled = target_scaler.inverse_transform(y_test.numpy().reshape(-1, 1))

        save_results(predictions, y_test_rescaled, target_feature, output_dir)

        mse, rmse, mae, mape, r2 = evaluate_model(y_test_rescaled, predictions)
        all_results.append({'Target Feature': target_feature, 'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'MAPE': mape, 'R2': r2})

    results_df = pd.DataFrame(all_results)
    results_file_path = os.path.join(output_dir, "model_evaluation_results.csv")
    results_df.to_csv(results_file_path, index=False)
    print(f"Evaluation results saved to: {results_file_path}")


Evaluating TimeGPT model for target: PM2.5
Results saved to: C:/Users/SPURGE/Desktop/JVTI/Python Language/TimesFM/RESULTS\PM2.5_predictions.csv
Evaluating TimeGPT model for target: PM10
Results saved to: C:/Users/SPURGE/Desktop/JVTI/Python Language/TimesFM/RESULTS\PM10_predictions.csv
Evaluating TimeGPT model for target: RH
Results saved to: C:/Users/SPURGE/Desktop/JVTI/Python Language/TimesFM/RESULTS\RH_predictions.csv
Evaluating TimeGPT model for target: SR
Results saved to: C:/Users/SPURGE/Desktop/JVTI/Python Language/TimesFM/RESULTS\SR_predictions.csv
Evaluation results saved to: C:/Users/SPURGE/Desktop/JVTI/Python Language/TimesFM/RESULTS\model_evaluation_results.csv


  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


In [13]:
import torch
print(torch.__version__)

2.5.1+cpu
