In [None]:
#Step 3: Listening for New Data
import os
import json
import logging
import time

class DataListener:
    def __init__(self, config_file):
        with open(config_file, 'r') as f:
            self.config = json.load(f)
        
        self.input_dir = self.config['input-directory']
        self.output_dir = self.config['output-directory']
        self.img_dir = self.config['img-directory']
        self.sensors_to_plot = self.config['sensor-names']
        self.check_interval = self.config['check-interval']

        self.logger = self.setup_logger()

    def setup_logger(self):
        logging.basicConfig(filename='app.log', level=logging.INFO,
                            format='%(asctime)s  %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
        return logging.getLogger(__name__)

    def listen(self):
        while True:
            try:
                new_file = self.find_new_file()
                if new_file:
                    self.process_file(new_file)
            except Exception as e:
                self.logger.error(f'Error occurred: {str(e)}')
            time.sleep(self.check_interval)
    
    def find_new_file(self):
        files = os.listdir(self.input_dir)
        for file in files:
            if file.endswith('.csv'):
                return os.path.join(self.input_dir, file)
        return None

    def process_file(self, file_path):
        self.logger.info('Found new data file')
        self.logger.info(f'Loaded the file {file_path}')
        
        # Load data from file
        data = pd.read_csv(file_path)
        
        # Preprocess data (same transformations as in training)
        data = data.drop(columns=['timestamp'])
        data = data.apply(pd.to_numeric, errors='coerce')
        
        imputer = SimpleImputer(strategy='median')
        data = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)
        
        scaler = StandardScaler()
        data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns)
        
        # Load trained model
        model_file = "trained_model.joblib"
        model = joblib.load(model_file)
        
        # Predict anomalies
        predictions = model.predict(data)
        
        # Save transformed data with predictions
        output_filename = os.path.join(self.output_dir, os.path.basename(file_path))
        data.to_csv(output_filename, index=False)
        self.logger.info('Received transformed data')
        
        # Generate and save sensor plots
        for sensor_name in self.sensors_to_plot:
            fig = plot_sensor_anomalies(data, sensor_name)
            img_filename = f"{file_path.split('/')[-1].split('.')[0]}-{sensor_name}.png"
            img_path = os.path.join(self.img_dir, img_filename)
            fig.savefig(img_path)
            self.logger.info(f'Saving image {img_filename}')
        
        # Remove original data file
        os.remove(file_path)
        self.logger.info('Resuming listening')
