## Testing for load magasinstatistikk to source database


In [2]:
import pandas as pd
import requests
from sqlalchemy import create_engine, Engine

from typing import Iterable

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
def load_raw_api_data(api_url: str) -> pd.DataFrame:
    """ Request GET data from url and store in dataframe """
    r = requests.get(api_url)
    df = pd.DataFrame(r.json())

    return df

### 1. Prepare areas for loading into db table "area":
### api_url='https://biapi.nve.no/magasinstatistikk/api/Magasinstatistikk/HentOmråder'

In [4]:
area_df = load_raw_api_data(
    api_url='https://biapi.nve.no/magasinstatistikk/api/Magasinstatistikk/HentOmråder'
)

area_df.head()


Unnamed: 0,land,elspot,vassdrag
0,"[{'navn': 'Norge', 'navn_langt': 'Norge', 'bes...","[{'navn': 'NO 1', 'navn_langt': 'Elspotområde ...","[{'navn': 'VASS1', 'navn_langt': 'Vassdragsomr..."


In [None]:
def prepare_areas(area_df: pd.DataFrame) -> pd.DataFrame:
    
    elspots_df = pd.read_csv('../elspots/elspots.csv')

    def normalize_and_assign_areas(area_df: pd.DataFrame, area: str) -> pd.DataFrame:
        return pd.json_normalize(area_df[area].iloc[0]).assign(current_area=area)

    areas = ['land', 'elspot', 'vassdrag']
    result_df = (
        pd.concat(map(lambda area: normalize_and_assign_areas(area_df, area), areas), ignore_index=True)
        .rename(columns={'omrType': 'omr_type'})
    )

    return result_df

area_df = prepare_areas(area_df)

area_df

Unnamed: 0,navn,navn_langt,beskrivelse,omr_type,omrnr,current_area
0,Norge,Norge,Hele landet,NO,0,land
1,NO 1,Elspotområde 1,Øst-Norge. Omfatter østlige del av Østlandet f...,EL,1,elspot
2,NO 2,Elspotområde 2,"Sørvest-Norge. Omfatter sørlige del av Viken, ...",EL,2,elspot
3,NO 3,Elspotområde 3,Midt-Norge. Omfatter nordre og vestlige del av...,EL,3,elspot
4,NO 4,Elspotområde 4,Nord-Norge. Omfatter resten av Trøndelag og No...,EL,4,elspot
5,NO 5,Elspotområde 5,Vest-Norge. Omfatter midtre del av Vestland op...,EL,5,elspot
6,VASS1,Vassdragsområde 1,"Sørøst-Norge. Østlandet, Agder-fylkene og dele...",VASS,1,vassdrag
7,VASS2,Vassdragsområde 2,"Vest-landet. Resten av Rogaland, mesteparten a...",VASS,2,vassdrag
8,VASS3,Vassdragsområde 3,"Midt-Norge. Møre og Romsdal, Trøndelag og sørl...",VASS,3,vassdrag
9,VASS4,Vassdragsområde 4,Nord-Norge. Resten av Nordland og nordover.,VASS,4,vassdrag


In [6]:
area_df.dtypes

navn            object
navn_langt      object
beskrivelse     object
omr_type        object
omrnr            int64
current_area    object
dtype: object

### 2. Retrieve magasin statistics data from:
### api_url='https://biapi.nve.no/magasinstatistikk/api/Magasinstatistikk/HentOffentligData'
#### We do this before preparing dates to only get the necessary date data we need for the magasin statistics

In [7]:
magasin_df = load_raw_api_data(
    api_url='https://biapi.nve.no/magasinstatistikk/api/Magasinstatistikk/HentOffentligData',
)

magasin_df['dato_Id'] = pd.to_datetime(magasin_df['dato_Id'], format='%Y-%m-%d', errors='coerce')
magasin_df['neste_Publiseringsdato'] = pd.to_datetime(magasin_df['neste_Publiseringsdato'], format='%Y-%m-%dT%H:%M:%S', errors='coerce')

### 3. Prepare dates for loading into db table "dates": 

In [8]:
def prepare_dates(years: Iterable[int]) -> pd.DataFrame:
    """Create a DataFrame modeled "dates" with all date data for given years."""
     
    # Generate all dates for the given years
    date_range = pd.date_range(start=f"{min(years)}-01-01", end=f"{max(years)}-12-31")

    df = pd.DataFrame({
        "iso_dato": date_range,
        "iso_aar": date_range.year,
        "maaling_uke": date_range.isocalendar().week,
        "iso_maaned": date_range.month,
        "iso_dag": date_range.day
    })
    
    df = df.reset_index(drop=True)

    return df

iso_aar = magasin_df['iso_aar'].unique()

dato_df = prepare_dates(iso_aar)
print(f"First 5: \n {dato_df.head(5)} \n \n Last 5: \n {dato_df.tail(5)} ")
print(iso_aar)

First 5: 
     iso_dato  iso_aar  maaling_uke  iso_maaned  iso_dag
0 1995-01-01     1995       52           1        1
1 1995-01-02     1995        1           1        2
2 1995-01-03     1995        1           1        3
3 1995-01-04     1995        1           1        4
4 1995-01-05     1995        1           1        5 
 
 Last 5: 
         iso_dato  iso_aar  maaling_uke  iso_maaned  iso_dag
11318 2025-12-27     2025       52          12       27
11319 2025-12-28     2025       52          12       28
11320 2025-12-29     2025        1          12       29
11321 2025-12-30     2025        1          12       30
11322 2025-12-31     2025        1          12       31 
[2007 2015 2005 2009 1995 2018 2016 2004 2003 2022 1997 2011 2012 2023
 2000 2020 2008 2002 2017 2024 1999 2019 1996 2010 2021 2001 1998 2013
 2006 2014 2025]


### 4. Drop 'iso_aar' and 'maaling_uke' which provides unecessary memory

In [9]:
magasin_df = magasin_df.drop(columns=['iso_aar', 'maaling_uke'])

### 5. Prepare magasin statistics for loading into db table "magasinstatistikk_model":

In [10]:
magasin_df.head()

Unnamed: 0,dato_Id,omrType,omrnr,fyllingsgrad,kapasitet_TWh,fylling_TWh,neste_Publiseringsdato,fyllingsgrad_forrige_uke,endring_fyllingsgrad
0,2007-03-04,EL,5,0.416593,17.425789,7.259468,NaT,0.450818,-0.034225
1,2015-02-08,EL,5,0.48543,17.425789,8.458999,NaT,0.527302,-0.041872
2,2005-09-11,EL,5,0.875464,17.425789,15.25566,NaT,0.858146,0.017319
3,2009-02-22,EL,5,0.391005,17.425789,6.813575,NaT,0.425169,-0.034164
4,1995-05-07,EL,5,0.172278,17.425789,3.002083,NaT,0.175498,-0.003219


In [11]:
magasin_df.shape

(14139, 9)

In [12]:
magasin_df.dtypes

dato_Id                     datetime64[ns]
omrType                             object
omrnr                                int64
fyllingsgrad                       float64
kapasitet_TWh                      float64
fylling_TWh                        float64
neste_Publiseringsdato      datetime64[ns]
fyllingsgrad_forrige_uke           float64
endring_fyllingsgrad               float64
dtype: object

In [13]:
magasin_df = magasin_df.rename(
    columns={
        'dato_Id': 'dato_id',
        'omrType': 'omr_type',
        'kapasitet_TWh': 'kapasitet_twh',
        'fylling_TWh': 'fylling_twh',
        'fyllingsgrad': 'fyllingsgrad',
        'neste_Publiseringsdato': 'neste_publiseringsdato'
    }
)
magasin_df.head()

Unnamed: 0,dato_id,omr_type,omrnr,fyllingsgrad,kapasitet_twh,fylling_twh,neste_publiseringsdato,fyllingsgrad_forrige_uke,endring_fyllingsgrad
0,2007-03-04,EL,5,0.416593,17.425789,7.259468,NaT,0.450818,-0.034225
1,2015-02-08,EL,5,0.48543,17.425789,8.458999,NaT,0.527302,-0.041872
2,2005-09-11,EL,5,0.875464,17.425789,15.25566,NaT,0.858146,0.017319
3,2009-02-22,EL,5,0.391005,17.425789,6.813575,NaT,0.425169,-0.034164
4,1995-05-07,EL,5,0.172278,17.425789,3.002083,NaT,0.175498,-0.003219


In [14]:
POSTGRES_USER="postgres"
POSTGRES_PASSWORD="changethis"

POSTGRES_SERVER="localhost"
POSTGRES_PORT=5432
POSTGRES_DB="nve_db"
SRC_DB_URL=f"postgresql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{POSTGRES_SERVER}:{POSTGRES_PORT}/{POSTGRES_DB}"


In [15]:
engine = create_engine(SRC_DB_URL)

### 6. Insert dates into "dates" table in db

In [16]:
def insert_into_db(engine: Engine, *, table_name: str, df: pd.DataFrame) -> pd.DataFrame:
    """ 
    Insert data into database and return a DF with all rows from the table,
    including any new columns created by the database 
    """ 
    with engine.begin() as conn:
        df.to_sql(
            name=table_name,
            con=conn,
            if_exists='append',
            index=False
        )
        
        new_df = pd.read_sql_table( table_name=table_name,
            con=conn
        )
            
        return new_df

In [17]:
dato_dim_table_df = insert_into_db(engine, table_name='dates', df=dato_df)

IntegrityError: (psycopg2.errors.UniqueViolation) duplicate key value violates unique constraint "dates_iso_dato_key"
DETAIL:  Key (iso_dato)=(1995-01-01) already exists.

[SQL: INSERT INTO dates (iso_dato, iso_aar, maaling_uke, iso_maaned, iso_dag) VALUES (%(iso_dato__0)s, %(iso_aar__0)s, %(maaling_uke__0)s, %(iso_maaned__0)s, %(iso_dag__0)s), (%(iso_dato__1)s, %(iso_aar__1)s, %(maaling_uke__1)s, %(iso_maaned__1)s, %(iso_dag__1)s), (%( ... 95173 characters truncated ... 8)s), (%(iso_dato__999)s, %(iso_aar__999)s, %(maaling_uke__999)s, %(iso_maaned__999)s, %(iso_dag__999)s)]
[parameters: {'iso_dato__0': datetime.datetime(1995, 1, 1, 0, 0), 'iso_dag__0': 1, 'maaling_uke__0': 52, 'iso_aar__0': 1995, 'iso_maaned__0': 1, 'iso_dato__1': datetime.datetime(1995, 1, 2, 0, 0), 'iso_dag__1': 2, 'maaling_uke__1': 1, 'iso_aar__1': 1995, 'iso_maaned__1': 1, 'iso_dato__2': datetime.datetime(1995, 1, 3, 0, 0), 'iso_dag__2': 3, 'maaling_uke__2': 1, 'iso_aar__2': 1995, 'iso_maaned__2': 1, 'iso_dato__3': datetime.datetime(1995, 1, 4, 0, 0), 'iso_dag__3': 4, 'maaling_uke__3': 1, 'iso_aar__3': 1995, 'iso_maaned__3': 1, 'iso_dato__4': datetime.datetime(1995, 1, 5, 0, 0), 'iso_dag__4': 5, 'maaling_uke__4': 1, 'iso_aar__4': 1995, 'iso_maaned__4': 1, 'iso_dato__5': datetime.datetime(1995, 1, 6, 0, 0), 'iso_dag__5': 6, 'maaling_uke__5': 1, 'iso_aar__5': 1995, 'iso_maaned__5': 1, 'iso_dato__6': datetime.datetime(1995, 1, 7, 0, 0), 'iso_dag__6': 7, 'maaling_uke__6': 1, 'iso_aar__6': 1995, 'iso_maaned__6': 1, 'iso_dato__7': datetime.datetime(1995, 1, 8, 0, 0), 'iso_dag__7': 8, 'maaling_uke__7': 1, 'iso_aar__7': 1995, 'iso_maaned__7': 1, 'iso_dato__8': datetime.datetime(1995, 1, 9, 0, 0), 'iso_dag__8': 9, 'maaling_uke__8': 2, 'iso_aar__8': 1995, 'iso_maaned__8': 1, 'iso_dato__9': datetime.datetime(1995, 1, 10, 0, 0), 'iso_dag__9': 10, 'maaling_uke__9': 2, 'iso_aar__9': 1995, 'iso_maaned__9': 1 ... 4900 parameters truncated ... 'iso_dato__990': datetime.datetime(1997, 9, 17, 0, 0), 'iso_dag__990': 17, 'maaling_uke__990': 38, 'iso_aar__990': 1997, 'iso_maaned__990': 9, 'iso_dato__991': datetime.datetime(1997, 9, 18, 0, 0), 'iso_dag__991': 18, 'maaling_uke__991': 38, 'iso_aar__991': 1997, 'iso_maaned__991': 9, 'iso_dato__992': datetime.datetime(1997, 9, 19, 0, 0), 'iso_dag__992': 19, 'maaling_uke__992': 38, 'iso_aar__992': 1997, 'iso_maaned__992': 9, 'iso_dato__993': datetime.datetime(1997, 9, 20, 0, 0), 'iso_dag__993': 20, 'maaling_uke__993': 38, 'iso_aar__993': 1997, 'iso_maaned__993': 9, 'iso_dato__994': datetime.datetime(1997, 9, 21, 0, 0), 'iso_dag__994': 21, 'maaling_uke__994': 38, 'iso_aar__994': 1997, 'iso_maaned__994': 9, 'iso_dato__995': datetime.datetime(1997, 9, 22, 0, 0), 'iso_dag__995': 22, 'maaling_uke__995': 39, 'iso_aar__995': 1997, 'iso_maaned__995': 9, 'iso_dato__996': datetime.datetime(1997, 9, 23, 0, 0), 'iso_dag__996': 23, 'maaling_uke__996': 39, 'iso_aar__996': 1997, 'iso_maaned__996': 9, 'iso_dato__997': datetime.datetime(1997, 9, 24, 0, 0), 'iso_dag__997': 24, 'maaling_uke__997': 39, 'iso_aar__997': 1997, 'iso_maaned__997': 9, 'iso_dato__998': datetime.datetime(1997, 9, 25, 0, 0), 'iso_dag__998': 25, 'maaling_uke__998': 39, 'iso_aar__998': 1997, 'iso_maaned__998': 9, 'iso_dato__999': datetime.datetime(1997, 9, 26, 0, 0), 'iso_dag__999': 26, 'maaling_uke__999': 39, 'iso_aar__999': 1997, 'iso_maaned__999': 9}]
(Background on this error at: https://sqlalche.me/e/20/gkpj)

In [None]:
dato_dim_table_df.head()

Unnamed: 0,id,iso_dato,iso_aar,maaling_uke,iso_maaned,iso_dag
0,1,1995-01-01,1995,52,1,1
1,2,1995-01-02,1995,1,1,2
2,3,1995-01-03,1995,1,1,3
3,4,1995-01-04,1995,1,1,4
4,5,1995-01-05,1995,1,1,5


In [None]:
dato_dim_table_df.dtypes

id                     int64
iso_dato      datetime64[ns]
iso_aar                int64
maaling_uke                int64
iso_maaned             int64
iso_dag                int64
dtype: object

### 7. Insert areas into "area" table in db

In [25]:
elspots_df = pd.read_csv('../elspots/elspots.csv')

In [None]:
area_table_df = insert_into_db(engine, table_name='area', df=area_df)

In [None]:
area_table_df.head()

Unnamed: 0,id,navn,navn_langt,beskrivelse,omr_type,omrnr,current_area
0,1,Norge,Norge,Hele landet,NO,0,land
1,2,NO 1,Elspotområde 1,Øst-Norge. Omfatter østlige del av Østlandet f...,EL,1,elspot
2,3,NO 2,Elspotområde 2,"Sørvest-Norge. Omfatter sørlige del av Viken, ...",EL,2,elspot
3,4,NO 3,Elspotområde 3,Midt-Norge. Omfatter nordre og vestlige del av...,EL,3,elspot
4,5,NO 4,Elspotområde 4,Nord-Norge. Omfatter resten av Trøndelag og No...,EL,4,elspot


In [None]:
area_table_df.dtypes

id               int64
navn            object
navn_langt      object
beskrivelse     object
omr_type        object
omrnr            int64
current_area    object
dtype: object

### 8. Merge magasin with date_dim_table and area_table dataframes and adjust columns

In [None]:
magasin_df = (
    magasin_df
    .merge(
    right=dato_dim_table_df[['id', 'iso_dato']],
    how='left',
    left_on='dato_id',
    right_on='iso_dato',
    validate='m:1')
    .drop(columns=['dato_id'])
    .rename(columns={'id': 'dato_id'})
    .merge(
        right=area_table_df[['id', 'omr_type', 'omrnr']],
        how='left',
        left_on=['omrnr', 'omr_type'],
        right_on=['omrnr', 'omr_type'],
        validate='m:1'
    )
    .rename(columns={'id': 'area_id'})
    .drop(
        columns=['iso_dato', 'omrnr', 'omr_type']
    )
)
    

In [None]:
magasin_df.head()

Unnamed: 0,fyllingsgrad,kapasitet_twh,fylling_twh,neste_publiseringsdato,fyllingsgrad_forrige_uke,endring_fyllingsgrad,dato_id,area_id
0,0.416593,17.425789,7.259468,NaT,0.450818,-0.034225,4446,6
1,0.48543,17.425789,8.458999,NaT,0.527302,-0.041872,7344,6
2,0.875464,17.425789,15.25566,NaT,0.858146,0.017319,3907,6
3,0.391005,17.425789,6.813575,NaT,0.425169,-0.034164,5167,6
4,0.172278,17.425789,3.002083,NaT,0.175498,-0.003219,127,6


In [26]:
elspots_df.head()

Unnamed: 0,X,Y,Z,Name
0,69.565139,21.044306,0,Elspotområde 4
1,63.09231,9.020742,0,Elspotområde 3
2,60.423713,6.093197,0,Elspotområde 5
3,58.77286,7.038022,0,Elspotområde 2
4,60.9011,11.385549,0,Elspotområde 1


### 9. Insert magasin into "magasinstatistikk_model" table in db

In [27]:
magasin_table_df = insert_into_db(engine, table_name='magasinstatistikk_model', df=magasin_df)

ProgrammingError: (psycopg2.errors.UndefinedColumn) column "omr_type" of relation "magasinstatistikk_model" does not exist
LINE 1: INSERT INTO magasinstatistikk_model (dato_id, omr_type, omrn...
                                                      ^

[SQL: INSERT INTO magasinstatistikk_model (dato_id, omr_type, omrnr, fyllingsgrad, kapasitet_twh, fylling_twh, neste_publiseringsdato, fyllingsgrad_forrige_uke, endring_fyllingsgrad) VALUES (%(dato_id__0)s, %(omr_type__0)s, %(omrnr__0)s, %(fyllingsgrad__0) ... 221842 characters truncated ... , %(neste_publiseringsdato__999)s, %(fyllingsgrad_forrige_uke__999)s, %(endring_fyllingsgrad__999)s)]
[parameters: {'fyllingsgrad_forrige_uke__0': 0.45081845, 'omr_type__0': 'EL', 'fylling_twh__0': 7.259468, 'neste_publiseringsdato__0': None, 'kapasitet_twh__0': 17.425789, 'fyllingsgrad__0': 0.41659337, 'omrnr__0': 5, 'dato_id__0': datetime.datetime(2007, 3, 4, 0, 0), 'endring_fyllingsgrad__0': -0.034225076, 'fyllingsgrad_forrige_uke__1': 0.527302, 'omr_type__1': 'EL', 'fylling_twh__1': 8.458999, 'neste_publiseringsdato__1': None, 'kapasitet_twh__1': 17.425789, 'fyllingsgrad__1': 0.48542988, 'omrnr__1': 5, 'dato_id__1': datetime.datetime(2015, 2, 8, 0, 0), 'endring_fyllingsgrad__1': -0.041872144, 'fyllingsgrad_forrige_uke__2': 0.8581455, 'omr_type__2': 'EL', 'fylling_twh__2': 15.25566, 'neste_publiseringsdato__2': None, 'kapasitet_twh__2': 17.425789, 'fyllingsgrad__2': 0.8754645, 'omrnr__2': 5, 'dato_id__2': datetime.datetime(2005, 9, 11, 0, 0), 'endring_fyllingsgrad__2': 0.017319024, 'fyllingsgrad_forrige_uke__3': 0.42516926, 'omr_type__3': 'EL', 'fylling_twh__3': 6.8135753, 'neste_publiseringsdato__3': None, 'kapasitet_twh__3': 17.425789, 'fyllingsgrad__3': 0.39100525, 'omrnr__3': 5, 'dato_id__3': datetime.datetime(2009, 2, 22, 0, 0), 'endring_fyllingsgrad__3': -0.03416401, 'fyllingsgrad_forrige_uke__4': 0.17549753, 'omr_type__4': 'EL', 'fylling_twh__4': 3.0020826, 'neste_publiseringsdato__4': None, 'kapasitet_twh__4': 17.425789, 'fyllingsgrad__4': 0.17227814, 'omrnr__4': 5, 'dato_id__4': datetime.datetime(1995, 5, 7, 0, 0), 'endring_fyllingsgrad__4': -0.0032193959, 'fyllingsgrad_forrige_uke__5': 0.48542988, 'omr_type__5': 'EL', 'fylling_twh__5': 7.821195, 'neste_publiseringsdato__5': None, 'kapasitet_twh__5': 17.425789 ... 8900 parameters truncated ... 'kapasitet_twh__994': 17.425789, 'fyllingsgrad__994': 0.6704208, 'omrnr__994': 5, 'dato_id__994': datetime.datetime(2023, 7, 9, 0, 0), 'endring_fyllingsgrad__994': 0.039446652, 'fyllingsgrad_forrige_uke__995': 0.6681459, 'omr_type__995': 'EL', 'fylling_twh__995': 11.105072, 'neste_publiseringsdato__995': None, 'kapasitet_twh__995': 17.425789, 'fyllingsgrad__995': 0.637278, 'omrnr__995': 5, 'dato_id__995': datetime.datetime(2015, 1, 11, 0, 0), 'endring_fyllingsgrad__995': -0.030867875, 'fyllingsgrad_forrige_uke__996': 0.39100525, 'omr_type__996': 'EL', 'fylling_twh__996': 6.270085, 'neste_publiseringsdato__996': None, 'kapasitet_twh__996': 17.425789, 'fyllingsgrad__996': 0.3598164, 'omrnr__996': 5, 'dato_id__996': datetime.datetime(2009, 3, 1, 0, 0), 'endring_fyllingsgrad__996': -0.031188846, 'fyllingsgrad_forrige_uke__997': 0.74593896, 'omr_type__997': 'EL', 'fylling_twh__997': 12.958001, 'neste_publiseringsdato__997': None, 'kapasitet_twh__997': 17.425789, 'fyllingsgrad__997': 0.74361056, 'omrnr__997': 5, 'dato_id__997': datetime.datetime(2004, 9, 12, 0, 0), 'endring_fyllingsgrad__997': -0.0023283958, 'fyllingsgrad_forrige_uke__998': 0.9289993, 'omr_type__998': 'EL', 'fylling_twh__998': 15.620541, 'neste_publiseringsdato__998': datetime.datetime(2023, 11, 8, 13, 0), 'kapasitet_twh__998': 17.425789, 'fyllingsgrad__998': 0.8964037, 'omrnr__998': 5, 'dato_id__998': datetime.datetime(2023, 10, 29, 0, 0), 'endring_fyllingsgrad__998': -0.032595634, 'fyllingsgrad_forrige_uke__999': 0.30246642, 'omr_type__999': 'EL', 'fylling_twh__999': 5.041034, 'neste_publiseringsdato__999': None, 'kapasitet_twh__999': 17.425789, 'fyllingsgrad__999': 0.28928584, 'omrnr__999': 5, 'dato_id__999': datetime.datetime(1999, 5, 9, 0, 0), 'endring_fyllingsgrad__999': -0.013180584}]
(Background on this error at: https://sqlalche.me/e/20/f405)

### 10. Retrieve magasin statistics min max median model from:
### api_url: https://biapi.nve.no/magasinstatistikk/api/Magasinstatistikk/HentOffentligDataMinMaxMedian

In [None]:
magasin_min_max_df = load_raw_api_data(
    api_url='https://biapi.nve.no/magasinstatistikk/api/Magasinstatistikk/HentOffentligDataMinMaxMedian'
)

In [None]:
magasin_min_max_df.head()

Unnamed: 0,omrType,omrnr,maaling_uke,minFyllingsgrad,minFyllingTWH,medianFyllingsGrad,medianFylling_TWH,maxFyllingsgrad,maxFyllingTWH
0,EL,1,1,0.456893,2.742963,0.616703,3.70238,0.7596,4.560268
1,EL,1,2,0.422561,2.53685,0.573354,3.442137,0.698279,4.192127
2,EL,1,3,0.389071,2.335794,0.533293,3.201628,0.655389,3.934633
3,EL,1,4,0.350166,2.102222,0.489547,2.939,0.603621,3.623846
4,EL,1,5,0.318725,1.913467,0.449146,2.696455,0.570686,3.426118


### 11. Prepare magasin statistics min max median model for loading into db

In [None]:
magasin_min_max_df = (
    magasin_min_max_df
    .rename(columns={
        'omrType': 'omr_type',
        'minFyllingsgrad': 'min_fyllingsgrad',
        'minFyllingTWH': 'min_fylling_twh',
        'medianFyllingsGrad': 'median_fyllingsgrad',
        'medianFylling_TWH': 'median_fylling_twh',
        'maxFyllingsgrad': 'max_fyllingsgrad',
        'maxFyllingTWH': 'max_fylling_twh',
    })
    .merge(
    right=area_table_df[['id', 'omr_type', 'omrnr']],
    how='left',
    left_on=['omrnr', 'omr_type'],
    right_on=['omrnr', 'omr_type'],
    validate='m:1'
    )
    .rename(columns={'id': 'area_id'})
    .drop(
    columns=['omrnr', 'omr_type']
    )
)



In [None]:
magasin_min_max_df.head()

Unnamed: 0,maaling_uke,min_fyllingsgrad,min_fylling_twh,median_fyllingsgrad,median_fylling_twh,max_fyllingsgrad,max_fylling_twh,area_id
0,1,0.456893,2.742963,0.616703,3.70238,0.7596,4.560268,2
1,2,0.422561,2.53685,0.573354,3.442137,0.698279,4.192127,2
2,3,0.389071,2.335794,0.533293,3.201628,0.655389,3.934633,2
3,4,0.350166,2.102222,0.489547,2.939,0.603621,3.623846,2
4,5,0.318725,1.913467,0.449146,2.696455,0.570686,3.426118,2


### 12. Insert magasin statistics min max median into db

In [None]:
magasin_min_max_table_df = insert_into_db(engine, table_name='magasinstatistikk_min_max_median_model', df=magasin_min_max_df)

In [None]:
magasin_min_max_table_df.head()

Unnamed: 0,maaling_uke,min_fyllingsgrad,min_fylling_twh,median_fyllingsgrad,median_fylling_twh,max_fyllingsgrad,max_fylling_twh,area_id
0,1,0.456893,2.742963,0.616703,3.70238,0.7596,4.560268,2
1,2,0.422561,2.53685,0.573354,3.442137,0.698279,4.192127,2
2,3,0.389071,2.335794,0.533293,3.201628,0.655389,3.934633,2
3,4,0.350166,2.102222,0.489547,2.939,0.603621,3.623846,2
4,5,0.318725,1.913467,0.449146,2.696455,0.570686,3.426118,2


# This seems ok for now