# Imports 

In [None]:
import pandas as pd
import numpy as np 
import time 
import datetime 
import matplotlib.pyplot as plt 
%matplotlib inline  
import plotly.express as px
import plotly.graph_objects as go 
from sklearn.preprocessing import StandardScaler

# Reading Dataset 

In [None]:
orig_df = pd.read_csv('/Users/faymajidelhassan/Downloads/Master project /CODE/EDA/cleaned_measurements_data.csv') 
df = orig_df.copy() 
print(f'Size of the dataset: {df.shape} \n')  
print() 
display(df.head(5))

# Cleaning the data 

### Renaming he columns 

In [None]:
# df.columns = ['Date' , 'Temperature', 'Pressure' , 'Humidity']

### Removing possible outliers

In [None]:
df.describe()

In [None]:
# Compute quantiles for numeric columns only
display(df.select_dtypes(include='number').quantile([0, 0.25, 0.5, 0.75, 1], axis=0))


In [None]:
df.shape

In [None]:
# df = df[(df['humidity'] >= 39) & (df['humidity'] <= 78)]
# df = df[(df['pressure'] >= 1002.0) & (df['pressure'] <= 1014.0)]
# df = df[(df['temperature'] >= 19.0) & (df['temperature'] <= 32.0)]

In [None]:
df.shape

In [None]:
max_temp = np.max(df['temperature'])
max_pres = np.max(df['pressure'])
max_humid = np.max(df['humidity'])

min_temp = np.min(df['temperature'])
min_pres = np.min(df['pressure'])
min_humid = np.min(df['humidity'])

print(f'Max Temp : {max_temp}')
print(f'Max Pressure : {max_pres}')
print(f'Max Humidity : {max_humid}')

print(f'Min Temp : {min_temp}')
print(f'Min Pressure : {min_pres}')
print(f'Min Humidity : {min_humid}')

### Removing Missing values 

In [None]:
df.isnull().sum()

In [None]:
print('Dataframe shape : {}'.format(df.shape))

### Resetting index 

In [None]:
idx = pd.date_range('2018-04-17 05:00:00', periods=54034 , freq='1h')

In [None]:
df = df.set_index(idx)
df.index.name = 'timestamp' 
date_column = df['timestamp'] 
df = df.drop(columns = ['timestamp','global_irradiance','direct_irradiance','diffuse_irradiance','precipitation'] , axis = 1)

In [None]:
df.head()

In [None]:
df_array = np.array(df) 
df_array

### Reducing size for faster computation

In [None]:
df_array = df_array[:4000]
df_array.shape 

### Scaling features 

In [None]:
scaler = StandardScaler() 
scaled_df_array = scaler.fit_transform(df_array)

In [None]:
scaled_df_array[:10]

### Segregating feaures 

In [None]:
  sha = scaled_df_array[:, 0].reshape((-1, 1))
  spa = scaled_df_array[:, 1].reshape((-1, 1))
  sta = scaled_df_array[:, 2].reshape((-1, 1))

  sha.shape , spa.shape , sta.shape 

# Preparing training and test set

### Preparing training set

In [None]:
train_data_len = int(np.ceil(len(df_array)* 0.95))
train_data_len

In [None]:
x_train = []
y_train_temp = [] 
y_train_humidity = [] 
y_train_pressure = [] 

for i in range(48 , train_data_len) : 
  temp1 = sha[i - 48 : i, 0] 
  temp2 = spa[i - 48 : i, 0] 
  temp3 = sta[i - 48 : i, 0] 
  temp4 = np.concatenate((temp1 , temp2 , temp3))

  x_train.append(temp4) 
  y_train_temp.append(sta[i, 0])
  y_train_humidity.append(sha[i, 0])
  y_train_pressure.append(spa[i, 0])

  if i <= 49 : 
    print(x_train , np.array(x_train).shape)
    print()
    print(y_train_temp , y_train_humidity, y_train_pressure)
    print()
   

In [None]:
np.array(x_train).shape , np.array(y_train_humidity).shape, np.array(y_train_pressure).shape , np.array(y_train_temp).shape

In [None]:
x_train = np.array(x_train).reshape((-1 , 144 , 1))
y_train_humidity = np.array(y_train_humidity)
y_train_pressure = np.array(y_train_pressure)
y_train_temp = np.array(y_train_temp)

### Preparing testing set

In [None]:
test_data = sta[train_data_len - 48 : , :]
test_data.shape

In [None]:
x_test = []

y_test_temp = sta[train_data_len : , :] 
y_test_humidity = sha[train_data_len : , :]
y_test_pressure = spa[train_data_len : , :]

for i in range (48 , len(test_data)):
  temp1 = sha[train_data_len - 48 + i - 48 : train_data_len - 48 + i ]
  temp2 = spa[train_data_len - 48 + i - 48 : train_data_len - 48 + i ]
  temp3 = sta[train_data_len - 48 + i - 48 : train_data_len - 48 + i ]

  temp4 = np.concatenate((temp1 , temp2 , temp3))
  x_test.append(temp4)
  
  if  i<=49:
    print(x_test , np.array(x_test).shape)
    print()
    print(y_test_temp , y_test_humidity, y_test_pressure)
    print()
    

In [None]:
x_test = np.array(x_test)

y_test_humidity = np.array(y_test_humidity)
y_test_pressure = np.array(y_test_pressure)
y_test_temp = np.array(y_test_temp)

In [None]:
x_test.shape , y_test_humidity.shape , y_test_pressure.shape , y_test_temp.shape

In [None]:
print(x_train.shape )
x_test.shape

In [None]:
# # Preparing training and test set for all parameters

# # Preparing training set for all parameters
# x_train = []
# y_train_temp = [] 
# y_train_humidity = [] 
# y_train_pressure = [] 

# for i in range(48 , train_data_len) : 
#     temp1 = scaled_df_array[i - 48 : i, 0]  # Temperature
#     temp2 = scaled_df_array[i - 48 : i, 1]  # Pressure
#     temp3 = scaled_df_array[i - 48 : i, 2]  # Humidity

#     # Concatenate temperature, pressure, and humidity values
#     temp4 = np.concatenate((temp1 , temp2 , temp3))

#     x_train.append(temp4) 
#     y_train_temp.append(scaled_df_array[i, 0])   # Temperature target
#     y_train_humidity.append(scaled_df_array[i, 1])  # Humidity target
#     y_train_pressure.append(scaled_df_array[i, 2])  # Pressure target

# # Reshape the features and targets
# x_train = np.array(x_train).reshape((-1 , 144 , 3))
# y_train_humidity = np.array(y_train_humidity)
# y_train_pressure = np.array(y_train_pressure)
# y_train_temp = np.array(y_train_temp)

# # Preparing testing set for all parameters
# x_test = []

# for i in range (48 , len(test_data)):
#     temp1 = sha[train_data_len - 48 + i - 48 : train_data_len - 48 + i ]
#     temp2 = spa[train_data_len - 48 + i - 48 : train_data_len - 48 + i ]
#     temp3 = sta[train_data_len - 48 + i - 48 : train_data_len - 48 + i ]

#     temp4 = np.concatenate((temp1 , temp2 , temp3))
#     x_test.append(temp4)

# x_test = np.array(x_test)

# # Ensure the shape of the data
# print(x_train.shape , y_train_temp.shape , y_train_humidity.shape , y_train_pressure.shape)
# print(x_test.shape)

# # Define separate models for temperature, pressure, and humidity


# Creating model 

In [None]:
import keras
from keras.models import Sequential
from keras.layers import LSTM  , Dense
from keras.callbacks import EarlyStopping

### Temperature model

In [None]:
temperature_model = Sequential()

In [None]:
temperature_model.add(LSTM(128 , return_sequences = True , input_shape = (x_train.shape[1] , 1) ))
temperature_model.add(LSTM(128 , return_sequences = False))
temperature_model.add(Dense(64 , activation = "relu"))
temperature_model.add(Dense(1))
temperature_model.summary()

In [None]:
# from tensorflow.keras.utils import plot_model
# plot_model(temperature_model , show_shapes = True )

In [None]:
temperature_model.compile(optimizer = "adam" , loss="mean_squared_error")
temperature_model

#### Training

In [None]:
callbacks = EarlyStopping(
    patience = 5 , 
    restore_best_weights = True , 
    monitor = 'val_loss'
)

In [None]:
history = temperature_model.fit(
    x_train , 
    y_train_temp , 
    batch_size = 256 , 
    epochs = 75 ,
    verbose = 1 , 
    validation_split = 0.2 , 
    callbacks = [callbacks]
)

In [None]:
yp = temperature_model.predict(x_test)
print(yp[:5])

# z = np.zeros(800).reshape((400,2))
# yp = np.concatenate((yp_copy , z) , axis = 1)
# print()
# print(yp[:5])

# ypi = scaler.inverse_transform(yp)
# print()
# print(ypi[:5])


# ypn = ypi[: , 0]
# print()
# print(ypn[:5])

# rmse 
rmse = np.sqrt(np.mean((yp - y_test_temp)**2))
print(f'RMSE: {rmse}')

In [None]:
plt.rcParams.update({'figure.figsize': (15,6)})
plt.plot(y_test_temp[:100])
plt.plot(yp[:100] , color = "red")

# SARIMAX

In [None]:
from statsmodels.tsa.statespace.sarimax import SARIMAX

In [None]:


# Assuming you want to use temperature data from x_train
endog = x_train[:, :, 0].reshape(-1)  # Selecting the first column (temperature) as endogenous variable and reshaping to 1D

# Define SARIMAX model
model = SARIMAX(endog, order=(0, 1, 3), seasonal_order=(0, 1, 1, 12))

# Fit the model
# results = model.fit()
# results = model.fit(maxiter=1000)  # Increase maxiter to 1000 (or higher)
results = model.fit(method='powell')  # Try using the 'powell' method

# Print summary
print(results.summary())


In [None]:
# Predicting on the test dataset
test_predictions = results.predict(start=len(x_train), end=len(x_train)+len(x_test)-1)

# Plotting test predictions and actual test values
plt.figure(figsize=(10, 6))
plt.plot(test_predictions, label='Test Predictions')
plt.plot(y_test_temp, label='Actual Test Values')
plt.title('SARIMAX Model Test Predictions vs Actual Test Values')
plt.xlabel('Time')
plt.ylabel('Temperature')
plt.legend()
plt.show()


In [None]:
from sklearn.metrics import mean_squared_error
RMSE=np.sqrt(mean_squared_error(test_predictions,y_test_temp))
# Calculate RMSE


print(f'Root Mean Squared Error (RMSE): {RMSE}')