In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
import os

# Folder path for data
data_folder = r"D:\Downloads\Final Project\datasets"
dataset_files = ["534541.csv", "534546.csv", "549305.csv"]
summary = []

# Loop through each dataset file
for file_name in dataset_files:
# Load dataset
data_path = os.path.join(data_folder, file_name)
data = pd.read_csv(data_path)

# Features and target
X = data.iloc[:, 2:] # Features: all columns from the third onward
y = data.iloc[:, 1] # Target: second column

# Split data
X_train, y_train = X[:-48], y[:-48]
X_val, y_val = X[-48:-12], y[-48:-12]
X_test, y_test = X[-12:], y[-12:]

# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Validate and test model
y_val_pred = model.predict(X_val)
val_accuracy = 100 * (1 - np.abs((y_val - y_val_pred) / y_val)).mean()
print(f"Validation Accuracy for {file_name}: {val_accuracy:.2f}%")

y_test_pred = model.predict(X_test)
test_accuracy = 100 * (1 - np.abs((y_test - y_test_pred) / y_test)).mean()
print(f"Test Accuracy for {file_name}: {test_accuracy:.2f}%")

# Store results
summary.append({'Dataset': file_name, 'Validation Accuracy': val_accuracy, 'Test Accuracy': test_accuracy})

# Plot results
plt.figure(figsize=(8, 4))
plt.plot(data.iloc[-12:, 0], y_test, label='Actual', color='green')
plt.plot(data.iloc[-12:, 0], y_test_pred, label='Predicted', color='red')
plt.xlabel('Time')
plt.ylabel('Value')
plt.legend()
plt.title(f"Actual vs Predicted - {file_name}")
plt.show()

# Summary DataFrame
summary_df = pd.DataFrame(summary)
print(summary_df)

ModuleNotFoundError: No module named 'pandas'