In [None]:
# ------------------------------------------
# Imports and setup
# ------------------------------------------

import sys, os
import pandas as pd

sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..', 'src')))

#Import the class RawData
from data_import import RawData
raw_data = RawData()

#Import the class RefinedData
from data_handling import RefinedData
ref_data = RefinedData()


"""
Functions within the class RawData, with arguments:
    get_met(self, weather_station, weather_elements, weather_time, weather_resolution)
    get_nilu(self, threshold, file_path)

Functions within the class RefinedData, with arguments:
    missing_data(self, df)
    show_zeroes(self, df)
"""

'\nFunctions within the class, with arguments:\n    get_met(self, weather_station, weather_elements, weather_time, weather_resolution)\n    get_nilu(self, threshold, file_path)\n    missing_data(self, df)\n    show_zeroes(self, df)\n'

In [3]:
# ------------------------------------------
# FORMATTING OF DATAFRAME VISUALISATION
# ------------------------------------------

# A function to make pandas show an entire dataframe in a readable manner
def pretty_data(df):
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', None)
    display(df)

In [4]:
# ------------------------------------------
# WEATHER DATA - IMPORT AND PROCESSING
# ------------------------------------------

# Variables that can be changed from run to run for the weather data
weather_station = 'SN68860' # The ID of the weather station to fetch data from, can be multiple stations separated by a comma - 'SN68860' is Voll weatherstation
weather_elements = 'mean(air_temperature P1D),sum(precipitation_amount P1D),mean(wind_speed P1D)' # The measurements to include in the dataset, here: temperautre, precipitation, and wind speed
weather_time = '2024-01-30/2024-03-19' # The stretch of time to fetch weather data for
weather_resolution = 'P1D' # The level of granularity in the weather data. P1D is once per day. P1H is once per hour.

# Run the module for importing the weather data, and save it to a dataframe
df_weather = raw_data.get_met(weather_station, weather_elements, weather_time, weather_resolution)

# Check if data was fetched successfully and check for missing values
if df_weather is None:
    print("Failed to fetch weather data.")
else:
    missing_data = ref_data.missing_data(df_weather)
    if missing_data is not None:
        print(missing_data)
    else:
        pass

# Displaying the weather data as a table
#pretty_data(df_weather)

Data collected from frost.met.no!
There are  245 lines of data in this dataframe.

No missing values found in the data set! 



In [None]:
# ------------------------------------------
# AIR QUALITY DATA - IMPORT AND PROCESSING
# ------------------------------------------

# Variables that can be changed from run to run for the air quality data
threshold = 95 # The threshold below which we will not use the data for analysis (uptime % of day)
file_path = '../Data/luftkvalitet_trondheim_dag.csv' #Choose the file to get air quality data from

# Run the module for importing the air quality data, and save it to a dataframe
df_quality = raw_data.get_nilu(threshold, file_path)

# Check if data was fetched successfully and check for missing values
if df_quality is None:
    print("Failed to fetch weather data.")
else:
    missing_data = ref_data.missing_data(df_quality)
    if missing_data is not None:
        print("The following datapoints are missing:\n")
        pretty_data(missing_data)
    else:
        pass

poor_quality_data = ref_data.show_zeroes(df_quality)
print('These rows contain no data or data of too poor quality: \n\n',poor_quality_data, '\n')
pretty_data(poor_quality_data)

# Displaying the air quality data as a table
#pretty_data(df_quality)

Data collected from nilu.com!
There are  367 lines of data in this dataframe.

These datapoints are missing: 

    index                  column
0    107  Elgeseter NO µg/m³ Day 

These rows contain no data or data of too poor quality: 

 Index([ 18,  29,  50,  57,  79, 107, 108, 109, 110, 111, 112, 113, 114, 115,
       116, 117, 118, 119, 142, 156, 163, 185, 191, 203, 204, 205, 228, 229,
       230, 231, 232, 233, 234, 235, 269, 272, 275, 276, 277, 278, 279, 280,
       281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294,
       295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308,
       309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322,
       323, 324, 343],
      dtype='int64') 

