In [None]:
import pandas as pd
import numpy as np
import pickle
import importlib
from preprocessingtesting import DataPreprocessortesting
from sklearn.preprocessing import StandardScaler

In [None]:
weights = np.load('weights.npy')

In [None]:
with open('best_models.pkl', 'rb') as f:
    best_models = pickle.load(f)

In [None]:
first_model = best_models['jjas_Avg']
second_model = best_models['Jun']
third_model = best_models['Jul']
fourth_model = best_models['Aug']
fifth_model = best_models['Sep']

In [None]:
with open('sorted_correlation_data.pkl', 'rb') as f:
    sorted_correlation_data = pickle.load(f)

In [None]:
p = sorted_correlation_data['sorted_correlation_pearson']
k = sorted_correlation_data['sorted_correlation_kendall']
s = sorted_correlation_data['sorted_correlation_spearman']
m = sorted_correlation_data['sorted_correlation_mutual']

In [None]:
with open('best_scalers.pkl', 'rb') as f:
    best_scalers = pickle.load(f)

In [None]:
with open('long_term_mean.pkl', 'rb') as f:
    long_term_mean = pickle.load(f)

In [None]:
with open('long_term_anomalies.pkl', 'rb') as f:
    long_term_anomalies = pickle.load(f)

In [None]:
class Predictor:
    def __init__(self, nc_file, weights,means,anomalies, correlation_file,best_model,best_scaler,top_n):
        
        self.preprocessor = DataPreprocessortesting(nc_file,means,anomalies)
        self.autoencoder_input_data = self.preprocessor.execute_pipeline()
        self.weights = weights
        self.correlation_file = correlation_file
        self.best_model = best_model
        self.best_scaler = best_scaler
        self.top_n = top_n
        self.means = means
        self.anomalies = anomalies

        
    
    
    def convert_to_april_march(self,month_idx):
        
        return (month_idx-3) % 12 
    
    
    def Tweights(self,weight):
        threshold_values = []
        for threshold_multiplier in np.arange(2, 1, -0.1):
            num = []
            for i in range(weight.shape[1]):
                weight_mean = np.mean(weight[:, i])
                weight_std = np.std(weight[:, i])
                threshold_upper = weight_mean + threshold_multiplier * weight_std
                threshold_lower = weight_mean - threshold_multiplier * weight_std
                nodes_with_weight_above_upper_threshold = np.sum(weight[:, i] > threshold_upper)
                nodes_with_weight_below_lower_threshold = np.sum(weight[:, i] < threshold_lower)
                ten_percent_nodes = int(0.1 * weight.shape[0])

                if (nodes_with_weight_above_upper_threshold + nodes_with_weight_below_lower_threshold) > ten_percent_nodes:
                    num.append(nodes_with_weight_above_upper_threshold + nodes_with_weight_below_lower_threshold)
                    #print(i, nodes_with_weight_above_upper_threshold, threshold_upper, nodes_with_weight_below_lower_threshold, threshold_lower)

                    if len(num) == weight.shape[1]:
                        threshold_values.append(threshold_multiplier)
                        break
            if len(num) == weight.shape[1]:
                break

        return threshold_values
    
        
    def potential_pred(self,threshold_values,input_data,weights):
        pred = np.zeros((65,input_data.shape[0]))
        for i in range(weights.shape[1]):
            weight_mean = np.mean(weights[:,i])
            weight_std = np.std(weights[:,i])
            threshold_upper = weight_mean + threshold_values[0] * weight_std
            threshold_lower = weight_mean - threshold_values[0] * weight_std
            nodes_with_weight_above_upper_threshold = np.sum(weights[:, i] > threshold_upper)
            nodes_with_weight_below_lower_threshold = np.sum(weights[:, i] < threshold_lower)
            ten_percent_nodes = int(0.1 * weights.shape[0])
            if (nodes_with_weight_above_upper_threshold + nodes_with_weight_below_lower_threshold) > ten_percent_nodes:
                for h in range(input_data.shape[0]): 
                    pred_i = 0
                    for j in range(weights.shape[0]):
                        weight_value = weights[j,i]
                        if weight_value > threshold_upper or weight_value < threshold_lower:
                            pp = np.sum(weight_value*input_data[h,j])
                            pred_i += pp
                            pred[i,h] = pred_i
        return pred
        
    def get_pred_values(self):
        x_test = []
        for idx, month_idx, value in self.correlation_file:
            month_idx_april_march = self.convert_to_april_march(month_idx)
            if month_idx_april_march in [0, 1]:
                continue
            pred_value = self.pred[idx, month_idx_april_march]
            if len(x_test) >= self.top_n:
                break
            x_test.append(pred_value)

        return np.array(x_test).reshape(-1, 1)
    
    def predict(self):
        th = self.Tweights(self.weights)
        self.autoencoder_input_data = self.autoencoder_input_data.reshape(-1, 324)
        self.pred = self.potential_pred(th,self.autoencoder_input_data, self.weights)
        x_test0 = self.get_pred_values( )
        #scaler = StandardScaler()
        x_test0_reshaped = x_test0.reshape(1, -1)
        X_new_scaled = self.best_scaler.transform(x_test0_reshaped)
        
        predictions = self.best_model.predict(X_new_scaled)
        return predictions

In [None]:
predictor_mam = Predictor('Air_test.nc', 'weights.npy', p,first_model,15)
prediction_MAMmean = predictor_mam.predict()
prediction_MAMmean

In [None]:
predictor_june = Predictor('Air_test.nc',weights,long_term_mean,long_term_anomalies, p,second_model,second_scalar,20)
predictor_june = predictor_june.predict()
predictor_june

In [None]:
predictor_ = Predictor('Air_test.nc', 'weights.npy', p,third_model,15)
prediction_april = predictor_april.predict()
prediction_april

In [None]:
predictor_may = Predictor('Air_test.nc', 'weights.npy', p,fourth_model,15)
prediction_may = predictor_may.predict()
prediction_may