## Imports and connect to drive

In [None]:
from google.colab import drive
import pandas as pd
import requests
import numpy as np

In [None]:
drive.mount('/content/gdrive')

## Read historical data

### Air Quality Data

In [None]:
def complex_function(date):
  finalDate = ""
  date_split = date.split('/')
  for value in date_split:
    newValue = value
    if len(value) == 1:
      newValue = '0' + value
    finalDate += newValue
  return finalDate

In [None]:
def getAQI(pm25,pm10,no2):
  if pm25 == ' ':
    pm25 = 0
  if pm10 == ' ':
    pm10 = 0
  if no2 == ' ':
    no2 = 0
  return max([int(pm25),int(pm10),int(no2)])

In [None]:
date = '2023/1/2'
complex_function(date)

In [None]:
df_air_quality = pd.read_csv('/content/gdrive/MyDrive/AQI-Predictor/Data/sundsvall-kopmangatan_sweden-air-quality.csv')
df_air_quality.insert(1,"aqi",1,True)
df_air_quality['date'] = df_air_quality['date'].apply(complex_function).astype(int)
df_air_quality.sort_values(by = ['date'],inplace = True,ignore_index = True, ascending=False)
df_air_quality.columns = ['date','aqi', 'pm25', 'pm10','no2']
df_air_quality['aqi'] = df_air_quality.apply(lambda x: getAQI(x.pm25,x.pm10,x.no2),axis=1)
df_air_quality.head()

In [None]:
df_air_quality.info

### Weather Data

In [None]:
df_weather = pd.read_csv("/content/gdrive/MyDrive/AQI-Predictor/Data/sundsvall-weather-data.csv")
df_weather.rename(columns = {'time':'date'}, inplace = True)
df_weather.rename(columns = {'shortwave_radiation_sum_mj/m²':'shortwave_radiation_sum_mj'}, inplace = True)
df_weather.columns = df_weather.columns.str.lower()
df_weather.columns = df_weather.columns.str.replace('[°,(,)]', '')
df_weather.columns = df_weather.columns.str.replace('[" "]', '_')
df_weather.columns = ['date', 'weathercode', 'temperature_2m_max', 'temperature_2m_min', 'apparent_temperature_max','apparent_temperature_min',
              'sunrise','sunset','shortwave_radiation_sum','precipitation_sum','rain_sum','snowfall_sum','precipitation_hours',
              'windspeed_10m_max','windgusts_10m_max','winddirection_10m_dominant','et0_fao_evapotranspiration']
df_weather['date'] = df_weather['date'].str.replace('-', '', regex=True)
df_weather['date'] = df_weather['date'].astype(int)
df_weather.sort_values(by = ['date'],inplace = True,ignore_index = True, ascending=False)
df_weather.head()

##Connecting to Hopsworks

In [None]:
!pip install hopsworks

In [None]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store() 

##Creating Feature Groups

###Air Quality Data

In [None]:
air_quality_fg = fs.get_or_create_feature_group(
        name = 'air_quality_fg',
        description = 'Air Quality characteristics of each day',
        version = 2,
        primary_key = ['date'],
        online_enabled = True,
        event_time = 'date'
    )    

air_quality_fg.insert(df_air_quality)

### Weather Data

In [None]:
weather_fg = fs.get_or_create_feature_group(
        name = 'weather_fg',
        description = 'Weather characteristics of each day',
        version = 1,
        primary_key = ['date'],
        online_enabled = True,
        event_time = 'date'
    )    

weather_fg.insert(df_weather)