# <span style="font-width:bold; font-size: 3rem; color:#2656a3;">**Data Engineering and Machine Learning Operations in Business** </span> <span style="font-width:bold; font-size: 3rem; color:#333;">- Part 02: Feature Pipeline</span>

## 🗒️ This notebook is divided into the following sections:
1. Parse new aata.
2. Insert new data into the Feature Store.

## <span style='color:#2656a3'> ⚙️ Import of libraries and packages

In [1]:
# Importing of the packages for the needed libraries for the Jupyter notebook
import pandas as pd
import requests

# Ignore warnings
import warnings 
warnings.filterwarnings('ignore')

## <span style='color:#2656a3'> 🪄 Parsing new data

### <span style="color:#2656a3;">💸 Electricity prices per day from Energinet

In [None]:
# Defining the URL for the API call to the electricity price data
electricity_api_url = ('https://api.energidataservice.dk/dataset/Elspotprices?offset=0&start=2024-01-01T00:00&end=2024-04-08T00:00&filter=%7B%22PriceArea%22:[%22DK1%22]%7D&sort=HourUTC%20DESC')

In [None]:
# Fetch data from the API and make the output to a pandas dataframe
electricity_data_response = requests.get(electricity_api_url)
electricity_data = electricity_data_response.json()
electricity_df = pd.DataFrame(electricity_data['records'])

# Checking the result of the API call. If the response if 200 then the API call was successfull
print(electricity_data_response)

In [None]:
# Datapreprocessing by making the spotprice per kwh instead of mwh
electricity_df['SpotPriceDKK_KWH'] = electricity_df['SpotPriceDKK'] / 1000

In [None]:
# Datacleaning by removing the columns that are not needed
electricity_df.drop('SpotPriceDKK', axis=1, inplace=True)
electricity_df.drop('SpotPriceEUR', axis=1, inplace=True)
electricity_df.drop('HourUTC', axis=1, inplace=True)

In [None]:
# Renaming the columns and reformating the time column
electricity_df.rename(columns={'HourDK': 'time'}, inplace=True)

In [None]:
# Formatting the date column
electricity_df['time'] = electricity_df['time'].astype(str).str[:-3]
electricity_df['date'] = electricity_df['time'].str[:10]

In [None]:
# Creating a new column for the date called electricity_temporary_date_column and insert it as the first column in the dataframe
electricity_temporary_date_column = electricity_df.pop('date')
electricity_df.insert(0, 'date', electricity_temporary_date_column)

In [None]:
# Convert string 'date' column to date type and 'time' column to datetime format
electricity_df['date'] = pd.to_datetime(electricity_df['date'], format='%Y-%m-%d').dt.date
electricity_df['time'] = pd.to_datetime(electricity_df['time'])

In [None]:
# Display the first 5 rows of the dataframe
electricity_df.head()

### <span style="color:#2656a3;"> 🌤 Weather measurements from Open Meteo

In [None]:
# Defining the URL for the API call to the electricity price data
weather_api_url = ('https://archive-api.open-meteo.com/v1/archive?latitude=57.048&longitude=9.9187&start_date=2024-01-01&end_date=2024-04-08&hourly=temperature_2m,relative_humidity_2m,precipitation,rain,snowfall,weather_code,cloud_cover,wind_speed_10m,wind_gusts_10m&timezone=auto')

In [20]:
# Fetch data from the API and make the output to a pandas dataframe
weather_data_response = requests.get(weather_api_url)
weather_data = weather_data_response.json()
weather_df = pd.DataFrame(weather_data['hourly'])

# Checking the result of the API call
print(weather_data_response)


<Response [200]>


In [5]:
# Formatting the date column
weather_df['date'] = weather_df['time'].str[:10]

In [6]:
# Creating a new column for the date called weather_temporary_date_column and insert it as the first column in the dataframe
weather_temporary_date_column = weather_df.pop('date')
weather_df.insert(0, 'date', weather_temporary_date_column)

In [7]:
# Convert string 'date' column to date type
weather_df['date'] = pd.to_datetime(weather_df['date'], format='%Y-%m-%d').dt.date
weather_df['time'] = pd.to_datetime(weather_df['time'])

In [8]:
# Display the first 5 rows of the dataframe
weather_df.head()

Unnamed: 0,date,time,temperature_2m,relative_humidity_2m,precipitation,rain,snowfall,weather_code,cloud_cover,wind_speed_10m,wind_gusts_10m
0,2024-01-01,2024-01-01 00:00:00,4.8,95,1.8,1.8,0.0,61,100,23.6,49.0
1,2024-01-01,2024-01-01 01:00:00,4.9,95,1.2,1.2,0.0,55,100,21.6,43.2
2,2024-01-01,2024-01-01 02:00:00,4.8,96,0.6,0.6,0.0,53,100,18.4,39.2
3,2024-01-01,2024-01-01 03:00:00,4.3,96,0.8,0.8,0.0,53,100,16.7,33.8
4,2024-01-01,2024-01-01 04:00:00,4.4,97,0.3,0.3,0.0,51,100,15.4,30.2


## <span style="color:#2656a3;"> 📡 Connecting to Hopsworks Feature Store

In [None]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store()

In [None]:
# Retrieve feature groups
weather_fg = fs.get_feature_group(
    name="weather_measurements",
    version=1,
)

electricity_fg = fs.get_feature_group(
    name="electricity_prices",
    version=1,
)

### <span style="color:#2656a3;"> ⬆️ Uploading new data to the Feature Store

In [None]:
# Inserting the weather_df into the feature group named weather_fg
weather_fg.insert(weather_df)

In [None]:
# Inserting the electricity_df into the feature group named electricity_fg
electricity_fg.insert(electricity_df)

---
## <span style="color:#2656a3;">⏭️ **Next:** Part 03: Traning </span>

In the next notebook, you will be generating new data for the Feature Groups.