In [104]:
import pandas as pd 
from typing import Callable
import numpy as np
from scipy.fft import fft

In [98]:
class FeatureEngine:
    def __init__(self,):
        self.training_set = pd.read_csv("cleaned_data/train_data.csv")
        self.validation_set = pd.read_csv("cleaned_data/validation_data.csv")
        #self.time_series = pd.read_csv("Experimental_Notebooks/resampled_day.csv")
        #self.aggregate_window(self.tar, "tar")
        #self.aggregate_window(self.tbr, "tbr")
        #self.aggregate_window(self.rolling_mean, "mean")
        #self.aggregate_window(self.rolling_deviation, "std")
        #self.aggregate_window(self.tir, "tir")
        #self.aggregate_window(self.fft, "fft")



    def demographic_features(self,) -> None:
        """
        Extract demographic features from the CRFSCreening (HScreening.txt)
        """
        return
    
    def fft(self, arr:np.ndarray) -> np.ndarray:
        return fft(arr)
    
    def rolling_deviation(self, arr:np.ndarray) -> np.ndarray:
        return np.std(arr)
    
    def tar(self, arr: np.ndarray) -> np.ndarray:
        mask = arr > 180
        return np.mean(mask)
    
    def tir(self, arr: np.ndarray) -> np.ndarray:
        mask = (arr >= 70) & (arr <= 180)
        return np.mean(mask)
    
    def tbr(self, arr: np.ndarray) -> np.ndarray:
        mask = arr < 70
        return np.mean(mask)
    
    def rolling_mean(self, arr: np.ndarray) -> np.ndarray:
        return np.mean(arr)

    def aggregate_window(self, func: Callable, func_name: str) -> None:
        """
        Apply func over various window sizes and add columns to training and validation sets for the output of these aggregate functions
        
        Args:
        func (Callable): A function to apply to each aggregate window.
        func_name (str): The name of the function, used to create new column names.
        """
        # Define the time intervals in minutes for aggregation
        time_windows = {
            'last_10_minutes': 10,
            'last_30_minutes': 30,
            'last_1_hour': 60,
            'last_3_hours': 180,
            'last_6_hours': 360,
            'last_12_hours': 720
        }

        # Convert the column names to a format we can perform calculations on (number of minutes since 06:00:00)

        stamps = self.training_set.columns[4:220]
        
        times = pd.to_timedelta(stamps).total_seconds()/60  # Convert to minutes

        # Apply the aggregation function over specified time windows
        for window_name, minutes in time_windows.items():
            # Find the time range for each window
            max_time = times.max()
            min_time = max_time - minutes

            # Get columns that fall within the current time window
            columns_to_aggregate = [time for time in stamps if min_time < pd.to_timedelta(time).total_seconds()/60 <= max_time]

            # For FFT, handle real and imaginary parts separately
            if func == self.fft:
                # Convert DataFrames to arrays before applying FFT
                fft_results_train = np.apply_along_axis(func, 1, self.training_set[columns_to_aggregate].values)
                fft_results_valid = np.apply_along_axis(func, 1, self.validation_set[columns_to_aggregate].values)

                for i in range(fft_results_train.shape[1]):
                    self.training_set[f'{func_name}_{window_name}_real_{i}'] = fft_results_train[:, i].real
                    self.training_set[f'{func_name}_{window_name}_imag_{i}'] = fft_results_train[:, i].imag
                    self.validation_set[f'{func_name}_{window_name}_real_{i}'] = fft_results_valid[:, i].real
                    self.validation_set[f'{func_name}_{window_name}_imag_{i}'] = fft_results_valid[:, i].imag
            
            else:
                # Apply the function to the selected columns and store in a new column
                self.training_set[f'{func_name}_{window_name}'] = self.training_set[columns_to_aggregate].apply(func, axis=1)
                self.validation_set[f'{func_name}_{window_name}'] = self.validation_set[columns_to_aggregate].apply(func, axis=1)



In [99]:
test = FeatureEngine()

  self.training_set = pd.read_csv("cleaned_data/train_data.csv")


In [100]:
test.validation_set.head()

Unnamed: 0.1,Unnamed: 0,index,id,corresponding_day,06:00:00,06:05:00,06:10:00,06:15:00,06:20:00,06:25:00,...,23:45:00,23:50:00,23:55:00,hypo,tir_last_10_minutes,tir_last_30_minutes,tir_last_1_hour,tir_last_3_hours,tir_last_6_hours,tir_last_12_hours
0,193,193,3,2015-05-23,163.0,159.0,155.0,152.0,147.0,141.0,...,85.0,86.0,81.0,False,1.0,1.0,1.0,1.0,0.902778,0.770833
1,194,194,3,2015-05-24,214.0,215.0,221.0,231.0,241.0,252.0,...,106.0,100.0,95.0,False,1.0,1.0,0.583333,0.194444,0.111111,0.5
2,195,195,3,2015-05-25,115.0,114.0,116.0,118.0,121.0,122.0,...,107.0,99.0,94.0,True,1.0,1.0,1.0,0.333333,0.263889,0.631944
3,196,196,3,2015-05-26,206.0,207.0,207.0,209.0,211.0,216.75,...,137.0,137.0,137.0,False,1.0,1.0,1.0,0.611111,0.486111,0.354167
4,197,197,3,2015-05-27,169.0,171.0,173.0,173.0,174.0,182.5,...,116.0,115.0,114.0,True,1.0,1.0,1.0,0.75,0.694444,0.541667


In [92]:
"""
TODO:
2. Add Morlet Mexican Hat Columns


3. Begin Adding Demographic Data
"""

''