In [41]:
import psycopg2
import psycopg2.extras as extras
import config_warehouse as creds
import config_lake as creds_lake
import pandas as pd

In [42]:
# Show Tables

def Show_tables():
    try:
        # DB connection
        conn_string = "host="+ creds_lake.PGHOST +" port="+ "5432" +" dbname="+ creds_lake.PGDATABASE +" user=" + creds_lake.PGUSER +" password="+ creds_lake.PGPASSWORD
        conn = psycopg2.connect(conn_string)
        cur = conn.cursor()
        # Select data
        cur.execute("""SELECT table_name FROM information_schema.tables
        WHERE table_schema = 'public'""")
        records = cur.fetchall()
        for item in records:
            print(item)
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()

Show_tables()

('electricityproductionplants',)
('measuringpoints',)
('measuringpointscoordinates',)
('electriccars21',)
('ZIPCode',)
('chargingstations_static',)
('chargingstations_locations',)


In [43]:
# Connection to Warehouse
def connect():

    # Set up a connection to the postgres server.
    conn_string = "host="+ creds.PGHOST +" port="+ "5432" +" dbname="+ creds.PGDATABASE +" user=" + creds.PGUSER \
                  +" password="+ creds.PGPASSWORD

    conn = psycopg2.connect(conn_string)
    print("Connected!")

    # Create a cursor object
    cursor = conn.cursor()

    return conn, cursor

In [44]:
connect()

Connected!


(<connection object at 0x000001B5B5277260; dsn: 'user=TeamX password=xxx dbname=warehouserds host=warehouserds.cbdiqykdx4bl.us-east-1.rds.amazonaws.com port=5432', closed: 0>,
 <cursor object at 0x000001B5B4A9AD60; closed: 0>)

In [45]:
# Create Table Grossregionen / Kantone

def create_table_regions():
    """ create tables in the PostgreSQL database"""
    commands = (
        """
        CREATE TABLE IF NOT EXISTS Regions_and_cantons (
            Canton_abbreviation CHAR(2) PRIMARY KEY,
            Canton_name VARCHAR(255) NOT NULL,
            Region_name VARCHAR(255) NOT NULL
            )
        """,
        """ INSERT INTO Regions_and_cantons(Canton_abbreviation, Canton_name, Region_name)
            VALUES
            ('GE', 'Geneva', 'Lake Geneva region'),
            ('VD', 'Vaud', 'Lake Geneva region'),
            ('VS', 'Valais', 'Lake Geneva region'),

            ('BE','Bern', 'Espace Mitteland'),
            ('SO', 'Solothurn', 'Espace Mitteland'),
            ('FR', 'Fribourg', 'Espace Mitteland'),
            ('NE', 'Neuchatel', 'Espace Mitteland'),
            ('JU', 'Jura', 'Espace Mitteland'),

            ('SG', 'St. Gallen', 'Eastern Switzerland'),
            ('TG', 'Thurgau', 'Eastern Switzerland'),
            ('AI', 'Appenzell Innerrhoden', 'Eastern Switzerland'),
            ('AR', 'Appenzell Ausserrhoden', 'Eastern Switzerland'),
            ('GL', 'Glarus', 'Eastern Switzerland'),
            ('SH', 'Schaffhausen', 'Eastern Switzerland'),
            ('GR', 'Graubünden', 'Eastern Switzerland'),

            ('UR', 'Uri', 'Central Switzerland'),
            ('SZ', 'Schwyz', 'Central Switzerland'),
            ('OW', 'Obwalden', 'Central Switzerland'),
            ('NW', 'Nidwalden', 'Central Switzerland'),
            ('LU', 'Lucerne', 'Central Switzerland'),
            ('ZG', 'Zug', 'Central Switzerland'),

            ('BS', 'Basel-Stadt', 'Northwestern Switzerland'),
            ('BL', 'Basel-Landschaft', 'Northwestern Switzerland'),
            ('AG', 'Aargau', 'Northwestern Switzerland'),

            ('ZH', 'Zurich', 'Zurich'),

            ('TI', 'Ticino', 'Ticino')
        """
    )
    try:
        # Set up a connection to the postgres server.
        conn_string = "host="+ creds.PGHOST +" port="+ "5432" +" dbname="+ creds.PGDATABASE +" user=" + creds.PGUSER \
                  +" password="+ creds.PGPASSWORD

        conn = psycopg2.connect(conn_string)
        print("Connected!")

        # Create a cursor object
        cursor = conn.cursor()
        # Create tables
        for command in commands:
            cursor.execute(command)
        print('Tables created')
        # close communication with the PostgreSQL database server
        cursor.close()
        # commit the changes
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()

In [46]:
create_table_regions()

Connected!
duplicate key value violates unique constraint "regions_and_cantons_pkey"
DETAIL:  Key (canton_abbreviation)=(GE) already exists.



In [47]:
# Dict Grossregionen
regions_dict =  {'Lake Geneva region' : {'GE' : 'Geneva' , 'VD' : 'Vaud', 'VS' : 'Valais'}, \
                 'Espace Mitteland' : {'BE' : 'Bern', 'SO' : 'Solothurn', 'FR' : 'Fribourg',
                                       'NE' : 'Neuchatel', 'JU' : 'Jura'}, \
                 'Eastern Switzerland' : {'SG' : 'St. Gallen', 'TG' : 'Thurgau',
                                          'AI' : 'Appenzell Innerrhoden', 'AR' : 'Appenzell Ausserrhoden',
                                          'GL' : 'Glarus', 'SH' : 'Schaffhausen', 'GR' : 'Graubünden'}, \
                 'Zurich' : {'ZH' : 'Zurich'}, \
                 'Central Switzerland' : {'UR' : 'Uri', 'SZ' : 'Schwyz', 'OW' : 'Obwalden',
                                          'NW' : 'Nidwalden', 'LU' : 'Lucerne', 'ZG' : 'Zug'}, \
                 'Northwestern Switzerland' : {'BS' : 'Basel-Stadt', 'BL' : 'Basel-Landschaft',
                                               'AG' : 'Aargau'}, \
                 'Ticino' : {'TI' : 'Ticino'}}

In [48]:
# Fetch Table plz

def fetch_data_plz():
    try:
        # DB connection
        conn_string = "host="+ creds_lake.PGHOST +" port="+ "5432" +" dbname="+ creds_lake.PGDATABASE +" user=" + creds_lake.PGUSER +" password="+ creds_lake.PGPASSWORD
        conn = psycopg2.connect(conn_string)
        cur = conn.cursor()
        # Select data
        cur.execute(""" SELECT DISTINCT postleitzahl, kanton FROM public."ZIPCode" where kanton not in ('FL', 'DE', 'IT')""") # plz can occur twice with different cantons
        # cur.execute(""" SELECT DISTINCT postleitzahl, kanton FROM public."ZIPCode" where kanton not in ('FL', 'DE', 'IT') GROUP BY postleitzahl, kanton HAVING COUNT(postleitzahl) = 1;""")
        df_records = cur.fetchall()
        df = pd.DataFrame(df_records, columns = ['postal_code', 'canton_abbreviation'])
        print(df.info())
        print(df[df.duplicated()==True]) # check for duplicates
        #df.drop_duplicates(subset=['postalcode'], keep='last', inplace=True)
        return df

    except (Exception, psycopg2.DatabaseError) as error:
        print(error)

    finally:
        if conn is not None:
            conn.close()

fetch_data_plz()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3487 entries, 0 to 3486
Data columns (total 2 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   postal_code          3487 non-null   int64 
 1   canton_abbreviation  3487 non-null   object
dtypes: int64(1), object(1)
memory usage: 54.6+ KB
None
Empty DataFrame
Columns: [postal_code, canton_abbreviation]
Index: []


Unnamed: 0,postal_code,canton_abbreviation
0,5303,AG
1,6452,UR
2,8301,ZH
3,8707,ZH
4,6928,TI
...,...,...
3482,6122,LU
3483,6110,LU
3484,8766,GL
3485,1071,VD


In [49]:
# Inserting dataframe into database
def execute_values(conn, df, table):
    tuples = [tuple(x) for x in df.to_numpy()]
    cols = ','.join(list(df.columns))
    # SQL query to execute
    query = "INSERT INTO %s(%s) VALUES %%s" % (table, cols)
    cur = conn.cursor()
    try:
        extras.execute_values(cur, query, tuples)
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        conn.rollback()
        cur.close()
        return 1
    print("the dataframe is inserted")
    cur.close()

In [50]:
def create_table_plz():
    """ create tables in the PostgreSQL database"""
    commands = (
        """
        CREATE TABLE IF NOT EXISTS Postal_codes (
            ID_postal_code SERIAL PRIMARY KEY,
            Postal_code INT NOT NULL,
            Canton_abbreviation CHAR(2) NOT NULL,
            FOREIGN KEY (Canton_abbreviation) REFERENCES regions_and_cantons(canton_abbreviation))
        """
    )
    try:
        # Set up a connection to the postgres server.
        conn_string = "host="+ creds.PGHOST +" port="+ "5432" +" dbname="+ creds.PGDATABASE +" user=" + creds.PGUSER \
                  +" password="+ creds.PGPASSWORD

        conn = psycopg2.connect(conn_string)
        print("Connected!")

        # Create a cursor object
        cursor = conn.cursor()

        # Create tables
        #for command in commands:
        cursor.execute(commands)
        print('Tables created')

        # Import data from dataframe
        execute_values(conn, fetch_data_plz(), 'Postal_codes')

        # close communication with the PostgreSQL database server
        cursor.close()
        # commit the changes
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()

create_table_plz()

Connected!
Tables created
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3487 entries, 0 to 3486
Data columns (total 2 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   postal_code          3487 non-null   int64 
 1   canton_abbreviation  3487 non-null   object
dtypes: int64(1), object(1)
memory usage: 54.6+ KB
None
Empty DataFrame
Columns: [postal_code, canton_abbreviation]
Index: []
Error: duplicate key value violates unique constraint "postal_codes_pkey"
DETAIL:  Key (id_postal_code)=(3488) already exists.

