# Loggning

In [1]:
import logging

logger = logging.getLogger(__name__)

# logging.basicConfig(filename='test_log.txt', level=logging.DEBUG)
logging.basicConfig(filename='test_log.txt', format='[%(asctime)s][%(levelname)s] %(message)s', level=logging.DEBUG)

In [3]:
logger.info('Hello')
logger.warning('Look out!')

In [1]:
import datetime
import logging
import pandas as pd

logging.basicConfig(
    filename='DataCleaner_log.txt', 
    format='[%(asctime)s][%(levelname)s] %(message)s', 
    level=logging.DEBUG)

class DataCleaner:
    def __init__(self, data) -> None:
        self.data = data
        self.logger = logging.getLogger(__name__)

    def clean_dates(self) -> None:
        for row in self.data:
            if '-' in row['date']:
                try:
                    y, m, d = [int(x) for x in row['date'].split('-')]
                except Exception as e:
                    self.logger.error((e, row['date']))
                    new_date = pd.NaT
                else:
                    new_date = datetime.date(y, m, d)
            elif '/' in row['date']:
                try:
                    m, d, y = [int(x) for x in row['date'].split('/')]
                except Exception as e:
                    self.logger.error((e, row['date']))
                    new_date = pd.NaT
                else:
                    new_date = datetime.date(2000 + y, m, d)
            # Hantera YYYYMMDD-format!
            else:
                self.logger.error('Could not parse date %s', row['date'])
                new_date = pd.NaT
            row['date'] = new_date

    def clean_temps(self):
        for row in self.data:
            try:
                new_temp = float(row['temp'])
            except TypeError:  
                new_temp = pd.NA
            except ValueError:  
                new_temp = float(row['temp'].replace(',', '.'))
            except Exception as e:
                self.logger.error((e, row['temp']))
                new_temp = pd.NA

            row['temp'] = new_temp

    def clean(self):
        self.clean_dates()
        self.clean_temps()
        return self.data


In [4]:
import json

data = json.load(open('../lektion_3/data/temps3.json'))

dc = DataCleaner(data=data)
dc.clean()

[{'date': datetime.date(2024, 4, 25), 'temp': 8.3},
 {'date': datetime.date(2024, 4, 26), 'temp': 8.1},
 {'date': datetime.date(2024, 4, 27), 'temp': 11.8},
 {'date': datetime.date(2024, 4, 28), 'temp': 17.6},
 {'date': datetime.date(2024, 4, 29), 'temp': <NA>},
 {'date': datetime.date(2024, 4, 30), 'temp': 17.9},
 {'date': datetime.date(2024, 5, 1), 'temp': 17.8},
 {'date': datetime.date(2024, 5, 2), 'temp': 17.7},
 {'date': datetime.date(2024, 5, 3), 'temp': 17.6},
 {'date': datetime.date(2024, 5, 4), 'temp': 16.1},
 {'date': datetime.date(2024, 5, 5), 'temp': 16.9},
 {'date': datetime.date(2024, 5, 6), 'temp': 13.5},
 {'date': datetime.date(2024, 5, 7), 'temp': 11.8},
 {'date': datetime.date(2024, 5, 8), 'temp': 14.3},
 {'date': datetime.date(2024, 5, 9), 'temp': <NA>},
 {'date': datetime.date(2024, 5, 10), 'temp': 13.2},
 {'date': datetime.date(2024, 5, 11), 'temp': 13.8},
 {'date': datetime.date(2024, 5, 12), 'temp': <NA>},
 {'date': datetime.date(2024, 5, 13), 'temp': 17.3},
 {'d