In [None]:
# ------------------------------------------
# IMPORT AND SETUP
# ------------------------------------------

import sys, os
import pandas as pd

sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..', 'src'))) # This construct was reworked with the assistance of AI (DeepSeek) 

#Import the class RawData
from data_import import RawData
raw_data = RawData()

#Import the class RefinedData
from data_handling import RefinedData
ref_data = RefinedData()

#Import the class VisualTable
from utils import VisualTable
pretty_table = VisualTable()

"""
Functions within the class RawData, with arguments:

    get_met(
        self, 
        weather_station, 
        weather_elements, 
        weather_time, 
        weather_resolution
        )

    get_nilu(
        self, 
        threshold, 
        file_path
        )

Functions within the class RefinedData, with arguments:

    missing_data(
        self, 
        df, 
        strategy, 
        fill_value
        )
        
    show_zeroes(
        self, 
        df
        )
"""

'\nFunctions within the class RawData, with arguments:\n\n    get_met(\n        self, \n        weather_station, \n        weather_elements, \n        weather_time, \n        weather_resolution\n        )\n\n    get_nilu(\n        self, \n        threshold, \n        file_path\n        )\n\nFunctions within the class RefinedData, with arguments:\n\n    missing_data(\n        self, \n        df, \n        strategy, \n        fill_value\n        )\n        \n    show_zeroes(\n        self, \n        df\n        )\n'

In [2]:
# ------------------------------------------
# WEATHER DATA - IMPORT AND PROCESSING
# ------------------------------------------

# Variables that can be changed from run to run for the weather data
weather_station = 'SN68860' # The ID of the weather station to fetch data from, can be multiple stations separated by a comma - 'SN68860' is Voll weatherstation
weather_elements = 'mean(air_temperature P1D),sum(precipitation_amount P1D),mean(wind_speed P1D)' # The measurements to include in the dataset, here: temperautre, precipitation, and wind speed
weather_time = '2024-01-30/2024-03-19' # The stretch of time to fetch weather data for
weather_resolution = 'P1D' # The level of granularity in the weather data. P1D is once per day. P1H is once per hour.

# Run the module for importing the weather data, and save it to a dataframe
df_weather = raw_data.get_met(weather_station, weather_elements, weather_time, weather_resolution)

# The following block of code was generated by AI when the API was down temporarily
# Purpose: Generate sample weather data that enables the program to run if the API is down
# AI Tool: DeepSeek

# Temporary weather data for when the API is down
'''df_weather = pd.DataFrame({
    'referenceTime': ['2024-01-30', '2024-01-31', '2024-02-01'],
    'Temperature': [10, 15, 11],
    'Precipitation': [0, 5, 2],
    'Wind_Speed': [3, 7, 2]
})'''

# Check if data was fetched successfully and check for missing values
if df_weather is None:
    print("Failed to fetch weather data.")
else:
    # Report missing values as a dataframe
    '''missing_df = ref_data.missing_data(df_weather, strategy='report') #The 'strategy' variable was suggested by AI (DeepSeek)
    if missing_df is not None:
        print("Missing values found at these locations:")
        pretty_table.pretty_data(missing_df)'''

    # Dataframe without the rows containing missing values
    '''cleaned_df = ref_data.missing_data(df_weather, strategy='drop')
    if cleaned_df is not None:
        print("DataFrame after dropping missing values:")
        pretty_table.pretty_data(cleaned_df)'''

    # Fill missing values with chosen fill_value
    filled_df = ref_data.missing_data(df_weather, strategy='fill', fill_value='NaN')
    if filled_df is not None:
        print("DataFrame after filling missing values:")
        pretty_table.pretty_data(filled_df)

# The following block of code was generated by AI to provide exploratory data analysis
# Purpose: Visualise and confirm that the program is behaving the way we expect
# AI Tool: DeepSeek

# Weather Data EDA
'''print("Weather Data Info:")
print(df_weather.info())
print("\nWeather Data Description:")
print(df_weather.describe())'''

# Displaying the weather data as a table
print('\nThe entire table:')
pretty_table.pretty_data(df_weather)

Data collected from frost.met.no!
There are  146 lines of data in this dataframe.

No missing values found in the data set! 


The entire table:


Unnamed: 0,referenceTime,temperature,wind_speed,precipitation
0,2024-01-30,-0.3,2.7,0.0
1,2024-01-31,2.8,5.7,0.0
2,2024-02-01,0.9,7.6,4.0
3,2024-02-02,3.1,4.6,6.9
4,2024-02-03,1.6,9.0,27.1
5,2024-02-04,-1.6,3.7,8.8
6,2024-02-05,-2.7,2.7,3.5
7,2024-02-06,-6.8,1.3,0.5
8,2024-02-07,-10.3,2.8,0.0
9,2024-02-08,-7.7,3.7,0.0


In [None]:
# ------------------------------------------
# AIR QUALITY DATA - IMPORT AND PROCESSING
# ------------------------------------------

# Variables that can be changed from run to run for the air quality data
threshold = 95 # The threshold below which we will not use the data for analysis (uptime % of day)
file_path = '../data/luftkvalitet_trondheim_dag.csv' #Choose the file to get air quality data from

# Run the module for importing the air quality data, and save it to a dataframe
df_quality = raw_data.get_nilu(threshold, file_path)

# Check if data was fetched successfully and check for missing values
if df_quality is None:
    print("Failed to fetch weather data.")
else:
    missing_data = ref_data.missing_data(df_quality)
    if missing_data is not None:
        print("\nThe following datapoints are missing:")
        pretty_table.pretty_data(missing_data)
    else:
        pass

# The following block of code was generated by AI to provide exploratory data analysis
# - Purpose: Visualise and confirm that the program is behaving the way we expect
# - AI Tool: DeepSeek

# Air Quality Data EDA
'''print("\nAir Quality Data Info:")
print(df_quality.info())
print("\nAir Quality Data Description:")
print(df_quality.describe())'''

# Displaying the air quality data as a table
print('\nThe entire table:')
pretty_table.pretty_data(df_quality)

Data collected from nilu.com!
There are 367 lines of data in this dataframe.


The following datapoints are missing:


Unnamed: 0,index,column
0,18,NO_µgm³
1,18,NO2_µgm³
2,18,NOx_µgm³
3,18,PM10_µgm³
4,18,PM2.5_µgm³
5,29,PM10_µgm³
6,29,PM2.5_µgm³
7,50,NO_µgm³
8,50,NO2_µgm³
9,50,NOx_µgm³



The entire table:


Unnamed: 0,timestamp,NO_µgm³,NO2_µgm³,NOx_µgm³,PM10_µgm³,PM2.5_µgm³
0,2024-02-19,21.319116,47.910158,80.485768,11.695174,9.119713
1,2024-02-20,36.586944,48.447275,104.352126,20.795971,14.341274
2,2024-02-21,28.709966,43.229089,87.097916,11.257472,5.809501
3,2024-02-22,8.784671,16.79599,30.218967,4.780046,0.898309
4,2024-02-23,4.420641,11.082582,17.837321,1.219465,0.168885
5,2024-02-24,1.92963,7.355437,10.303912,2.44495,0.019941
6,2024-02-25,5.334206,17.168342,25.319009,15.155823,2.120134
7,2024-02-26,5.787199,22.636717,31.479558,6.123895,2.299517
8,2024-02-27,21.400212,30.552252,63.251776,18.65754,3.183995
9,2024-02-28,11.161719,27.314969,44.370076,33.852309,4.013046


In [None]:
# ------------------------------------------
# SAVE DATA TO FILE
# ------------------------------------------

# The following block of code was written with assistance from AI
# - Purpose: The function saves the reworked dataframes to a file. AI was used to pinpoint the correct folder location.
# - AI Tool: DeepSeek

df_weather.to_csv('../data/refined_weather_data.csv', encoding='utf-8', index=False)

df_quality.to_csv('../data/refined_air_qualty_data.csv', encoding='utf-8', index=False)