In [2]:
# ------------------------------------------
# IMPORT AND SETUP
# ------------------------------------------

import sys, os
import pandas as pd

sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..', 'src')))

#Import the class RawData
from data_import import RawData
raw_data = RawData()

#Import the class RefinedData
from data_handling import RefinedData
ref_data = RefinedData()

#Import the class VisualTable
from utils import VisualTable
pretty_table = VisualTable()

"""
Functions within the class RawData, with arguments:

    get_met(
        self, 
        weather_station, 
        weather_elements, 
        weather_time, 
        weather_resolution
        )

    get_nilu(
        self, 
        threshold, 
        file_path
        )

Functions within the class RefinedData, with arguments:

    missing_data(
        self, 
        df, 
        strategy, 
        fill_value
        )
        
    show_zeroes(
        self, 
        df
        )
"""

'\nFunctions within the class RawData, with arguments:\n\n    get_met(\n        self, \n        weather_station, \n        weather_elements, \n        weather_time, \n        weather_resolution\n        )\n\n    get_nilu(\n        self, \n        threshold, \n        file_path\n        )\n\nFunctions within the class RefinedData, with arguments:\n\n    missing_data(\n        self, \n        df, \n        strategy, \n        fill_value\n        )\n        \n    show_zeroes(\n        self, \n        df\n        )\n'

In [None]:
# ------------------------------------------
# WEATHER DATA - IMPORT AND PROCESSING
# ------------------------------------------

# Variables that can be changed from run to run for the weather data
weather_station = 'SN68860' # The ID of the weather station to fetch data from, can be multiple stations separated by a comma - 'SN68860' is Voll weatherstation
weather_elements = 'mean(air_temperature P1D),sum(precipitation_amount P1D),mean(wind_speed P1D)' # The measurements to include in the dataset, here: temperautre, precipitation, and wind speed
weather_time = '2024-01-30/2024-03-19' # The stretch of time to fetch weather data for
weather_resolution = 'P1D' # The level of granularity in the weather data. P1D is once per day. P1H is once per hour.

# Run the module for importing the weather data, and save it to a dataframe
df_weather = raw_data.get_met(weather_station, weather_elements, weather_time, weather_resolution)

# Temporary weather data for when the API is down
'''df_weather = pd.DataFrame({
    'referenceTime': ['2024-01-30', '2024-01-31', '2024-02-01'],
    'Temperature': [10, 15, 11],
    'Precipitation': [0, 5, 2],
    'Wind_Speed': [3, 7, 2]
})'''

# Check if data was fetched successfully and check for missing values
if df_weather is None:
    print("Failed to fetch weather data.")
else:
    # Report missing values as a dataframe
    missing_df = ref_data.missing_data(df_weather, strategy='report')
    if missing_df is not None:
        print("Missing values found at these locations:")
        pretty_table.pretty_data(missing_df)

    # Dataframe without the rows containing missing values
    '''cleaned_df = ref_data.missing_data(df_weather, strategy='drop')
    if cleaned_df is not None:
        print("DataFrame after dropping missing values:")
        pretty_table.pretty_data(cleaned_df)'''

    # Fill missing values with chosen fill_value
    '''filled_df = ref_data.missing_data(df_weather, strategy='fill', fill_value=None)
    if filled_df is not None:
        print("DataFrame after filling missing values:")
        pretty_table.pretty_data(filled_df)'''

# Weather Data EDA
'''print("Weather Data Info:")
print(df_weather.info())
print("\nWeather Data Description:")
print(df_weather.describe())'''

# Displaying the weather data as a table
#pretty_table.pretty_data(df_weather)

Data collected from frost.met.no!
There are  245 lines of data in this dataframe.

No missing values found in the data set! 

Weather Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 245 entries, 0 to 244
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   value          245 non-null    float64
 1   unit           245 non-null    object 
 2   timeOffset     245 non-null    object 
 3   referenceTime  245 non-null    object 
dtypes: float64(1), object(3)
memory usage: 7.8+ KB
None

Weather Data Description:
            value
count  245.000000
mean     1.309796
std      3.920238
min    -10.300000
25%      0.000000
50%      0.600000
75%      2.800000
max     27.100000


In [None]:
# ------------------------------------------
# AIR QUALITY DATA - IMPORT AND PROCESSING
# ------------------------------------------

# Variables that can be changed from run to run for the air quality data
threshold = 95 # The threshold below which we will not use the data for analysis (uptime % of day)
file_path = '../Data/luftkvalitet_trondheim_dag.csv' #Choose the file to get air quality data from

# Run the module for importing the air quality data, and save it to a dataframe
df_quality = raw_data.get_nilu(threshold, file_path)

# Check if data was fetched successfully and check for missing values
if df_quality is None:
    print("Failed to fetch weather data.")
else:
    missing_data = ref_data.missing_data(df_quality)
    if missing_data is not None:
        print("The following datapoints are missing:")
        #pretty_table.pretty_data(missing_data)
    else:
        pass

poor_quality_data = ref_data.show_zeroes(df_quality)
print('These datapoints contain no data or data of too poor quality:')
#pretty_table.pretty_data(poor_quality_data)

# Air Quality Data EDA
'''print("\nAir Quality Data Info:")
print(df_quality.info())
print("\nAir Quality Data Description:")
print(df_quality.describe())'''

# Displaying the air quality data as a table
#pretty_table.pretty_data(df_quality)

Data collected from nilu.com!
There are  367 lines of data in this dataframe.

The following datapoints are missing:
These datapoints contain no data or data of too poor quality:

Air Quality Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 367 entries, 0 to 366
Data columns (total 11 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   Tid                        367 non-null    datetime64[ns]
 1   Elgeseter NO µg/m³ Day     295 non-null    float64       
 2   Dekning                    366 non-null    float64       
 3   Elgeseter NO2 µg/m³ Day    295 non-null    float64       
 4   Dekning.1                  366 non-null    float64       
 5   Elgeseter NOx µg/m³ Day    295 non-null    float64       
 6   Dekning.2                  366 non-null    float64       
 7   Elgeseter PM10 µg/m³ Day   364 non-null    float64       
 8   Dekning.3                  366 non-null    float64       