In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import matplotlib.pyplot as plt
import seaborn as sns

data_path = r'C:\Users\hamza\Downloads\ML Tasks\AirQualityUCI.csv'
air_quality_data = pd.read_csv(data_path, delimiter=';')
air_quality_data = air_quality_data.fillna(method='ffill')
air_quality_data['Datetime'] = pd.to_datetime(air_quality_data['Date'] + ' ' + air_quality_data['Time'], format='%d/%m/%Y %H:%M')
air_quality_data.drop(['Date', 'Time'], axis=1, inplace=True)
air_quality_data.set_index('Datetime', inplace=True)

target_column = 'CO(GT)'
data = air_quality_data[[target_column]]

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

def create_sequences(data, time_step=30):
    X, y = [], []
    for i in range(len(data) - time_step):
        X.append(data[i:(i + time_step), 0])
        y.append(data[i + time_step, 0])
    return np.array(X), np.array(y)

time_step = 30
X, y = create_sequences(scaled_data, time_step)

X = X.reshape(X.shape[0], X.shape[1], 1)

train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

model = Sequential()
model.add(LSTM(units=50, return_sequences=False, input_shape=(X_train.shape[1], 1)))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error')

model.fit(X_train, y_train, epochs=10, batch_size=32)

y_pred = model.predict(X_test)

y_test_rescaled = scaler.inverse_transform(y_test.reshape(-1, 1))
y_pred_rescaled = scaler.inverse_transform(y_pred)

rmse = np.sqrt(mean_squared_error(y_test_rescaled, y_pred_rescaled))
mae = mean_absolute_error(y_test_rescaled, y_pred_rescaled)

print(f'Root Mean Squared Error (RMSE): {rmse}')
print(f'Mean Absolute Error (MAE): {mae}')

plt.figure(figsize=(10, 6))
plt.plot(y_test_rescaled, label='Actual AQI')
plt.plot(y_pred_rescaled, label='Predicted AQI')
plt.title('Actual vs Predicted AQI')
plt.xlabel('Time')
plt.ylabel('Air Quality Index')
plt.legend()
plt.show()

results_df = pd.DataFrame({'Actual': y_test_rescaled.flatten(), 'Predicted': y_pred_rescaled.flatten()})
results_df.to_csv(r'C:\Users\hamza\Downloads\ML Tasks\AirQuality_Forecasting_Results.csv', index=False)
print("Predicted results saved to CSV.")
