In [31]:
from sklearn import linear_model as lm
import numpy as np
import datetime as dt
from DataGeneration import DataGeneration
from sensors.sensor import Sensor
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA


#Create a class that will predict future values of a sensor
#The class will take in a sensor object and a prediction end date
#The class will calculate the prediction intervals and the prediction start date from the sensor object values
#


class DataPrediction:
    def __init__(self, sensor, prediction_end_date, model_name):
        
        self.X = list(sensor.value.keys())
        self.prediction_intervals = self.X[-1] - self.X[-2]
        self.prediction_start_date = self.X[-1] + self.prediction_intervals
        self.prediction_end_date = prediction_end_date

        sensor_values_list = list(sensor.value.values())

        for i in range(len(sensor_values_list)):
            if sensor_values_list[i] is None:
                # Handle edge cases for the first and last elements
                if i == 0:
                    sensor_values_list[i] = sensor_values_list[i + 1]  # Example strategy, adjust as needed
                elif i == len(sensor_values_list) - 1:
                    sensor_values_list[i] = sensor_values_list[i - 1]  # Example strategy, adjust as needed
                else:
                    sensor_values_list[i] = (float(sensor_values_list[i - 1]) + float(sensor_values_list[i + 1])) / 2

            sensor_values_list[i] = float(sensor_values_list[i])

        self.Y = sensor_values_list
        self.X_future = None
        self.forcasted_values = sensor.forcasted_values

        self.model_type = self.select_model(model_name)

        self.model = None
    
    def set_prediction_timeframe(self):

        current_date = self.prediction_start_date

        future_dates = [current_date]

        while current_date + self.prediction_intervals <= self.prediction_end_date:
            current_date += self.prediction_intervals
            future_dates.append(current_date)

        self.X_future = future_dates

    def select_model(self, model_name):
        if model_name == 'linear_regression':
            return lm.LinearRegression()
        # elif model_name == 'random_forest':
        #     return lm.RandomForestRegressor()
        # elif model_name == 'svm':
        #     return lm.SVR()
        else:
            raise ValueError("Invalid model name. Please choose from 'linear_regression', 'random_forest', or 'svm'.")

    def predict(self):

        y_pred = self.model.get_forecast(len(self.X_future))
        y_pred_df = y_pred.conf_int(alpha=0.05)

        y_pred_values = self.model.predict(start = self.X_future[0], end = y_pred_df[-1])

        plt.plot(y_pred_values)
        plt.show()
        # number_past_entries = len(self.X)
        # number_of_future_entries = len(self.X_future)

        # prediction_times = [i for i in range(number_past_entries, number_past_entries + number_of_future_entries)]

        # self.forcasted_values = self.model.predict(np.array(prediction_times).reshape(-1,1))

    def train_model(self):

        print(self.Y)
        ARIMAmodel = ARIMA(self.Y, order=(5,1,0))

        ARIMAmodel = ARIMAmodel.fit()

        self.model = ARIMAmodel

        # train_times = np.array([i for i in range(len(self.X))]).reshape(-1,1)

        # x = np.nan_to_num(train_times, copy=True, nan=0.0, posinf=0.0, neginf=0.0).astype(np.float)

        # y = np.nan_to_num(np.array(self.Y).reshape(-1,1), copy=True, nan=0.0, posinf=0.0, neginf=0.0)

        # for i in range(0,len(y)):
        #     if y[i] == None:
        #         y[i] = (y[i-1].astype(np.float)+y[i+1].astype(np.float))/2

        # self.model.fit(x.reshape(-1,1), y.reshape(-1,1))

    def convert_prediction_to_dict(self):
        self.forcasted_values = dict(zip(self.X_future, self.forcasted_values))


time_series = DataGeneration(9, 30, 1, '2024-03-13 00:00:00').get_time_series()

sensor = Sensor(1, 'Temperature Sensor', 'A sensor that measures temperature', 'float', time_series)

predict = DataPrediction(sensor, dt.datetime(2024, 3, 17, 0, 0, 0), 'linear_regression')

predict.set_prediction_timeframe()
predict.train_model()
predict.predict()
predict.convert_prediction_to_dict()

dates1 = list(sensor.value.keys())
dates2 = list(predict.forcasted_values.keys())

values1 = list(sensor.value.values())

for i in range(0,len(values1)):
    if values1[i] == None:
        values1[i] = (float(values1[i-1])+float(values1[i+1]))/2

values2 = list(predict.forcasted_values.values())
             

plt.figure(figsize=(10, 5))  # Adjust the figure size as needed
plt.plot(dates1, values1, label='Dataset 1')  # Plot the first dataset
plt.plot(dates2, values2, label='Dataset 2', linestyle='--')  # Plot the second dataset with a different style

# Formatting the plot
plt.xlabel('Date')  # Set x-axis label
plt.ylabel('Value')  # Set y-axis label
plt.title('Comparison of Two Datasets')  # Set title
plt.legend()  # Show legend to differentiate the datasets
plt.grid(True)  # Show grid for better readability
plt.xticks(rotation=45)  # Rotate dates for better readability
plt.tight_layout()  # Adjust layout to make room for the rotated date labels

plt.show() 

[63.0, 63.0, 61.0, 61.0, 59.0, 59.0, 58.0, 58.0, 54.0, 54.0, 49.0, 49.0, 49.0, 49.0, 47.0, 47.0, 47.0, 47.0, 47.0, 47.0, 47.0, 47.0, 51.0, 51.0, 51.0, 51.0, 52.0, 52.0, 56.0, 56.0, 60.0, 60.0, 63.0, 63.0, 64.0, 64.0, 67.0, 67.0, 68.0, 68.0, 70.0, 70.0, 71.0, 71.0, 70.0, 70.0, 70.0, 70.0, 67.0, 67.0, 64.0, 64.0, 58.0, 58.0, 56.0, 56.0, 52.0, 52.0, 50.0, 50.0, 52.0, 52.0, 49.333333333333336, 48.0, 47.0, 47.0, 47.0, 47.0, 47.0, 47.0, 46.0, 46.0, 46.0, 46.0, 50.0, 50.0, 57.0, 57.0, 65.0, 65.0, 65.0, 67.5, 70.0, 70.0, 72.0, 73.0, 74.0, 74.0, 73.0, 73.0, 73.0, 70.0, 67.0, 67.0, 65.0, 65.0, 64.0, 64.0, 65.0]


KeyError: 'The `start` argument could not be matched to a location related to the index of the data.'

In [8]:
predict.X_future

[datetime.datetime(2024, 3, 15, 0, 30),
 datetime.datetime(2024, 3, 15, 1, 0),
 datetime.datetime(2024, 3, 15, 1, 30),
 datetime.datetime(2024, 3, 15, 2, 0),
 datetime.datetime(2024, 3, 15, 2, 30),
 datetime.datetime(2024, 3, 15, 3, 0),
 datetime.datetime(2024, 3, 15, 3, 30),
 datetime.datetime(2024, 3, 15, 4, 0),
 datetime.datetime(2024, 3, 15, 4, 30),
 datetime.datetime(2024, 3, 15, 5, 0),
 datetime.datetime(2024, 3, 15, 5, 30),
 datetime.datetime(2024, 3, 15, 6, 0),
 datetime.datetime(2024, 3, 15, 6, 30),
 datetime.datetime(2024, 3, 15, 7, 0),
 datetime.datetime(2024, 3, 15, 7, 30),
 datetime.datetime(2024, 3, 15, 8, 0),
 datetime.datetime(2024, 3, 15, 8, 30),
 datetime.datetime(2024, 3, 15, 9, 0),
 datetime.datetime(2024, 3, 15, 9, 30),
 datetime.datetime(2024, 3, 15, 10, 0),
 datetime.datetime(2024, 3, 15, 10, 30),
 datetime.datetime(2024, 3, 15, 11, 0),
 datetime.datetime(2024, 3, 15, 11, 30),
 datetime.datetime(2024, 3, 15, 12, 0),
 datetime.datetime(2024, 3, 15, 12, 30),
 datet