In [1]:
import pandas
import os
import sys
import re
import sqlalchemy

### import the picture table

In [3]:
# get the path to the excel file
xls_path = os.path.join(os.environ['HOME'], 'Dropbox/Icke/Bilder/ClaraPix/ClaraPics.xlsx')
pic_table = pandas.read_excel(xls_path, dtype={'Datum':str})
pic_table['Key'] = pic_table['Key'].str.replace(r' ?, ', ',').str.replace(' ',',').str.replace(',&,',' & ').str.replace('Magische,Tiere', 'Magische Tiere')
pic_table.iloc[0:3]

Unnamed: 0,img #,Wer,Hilfe von,Title,Key,Datum,Datum unbekannt,Note,Set
0,1,Clara,,Muster,Kunst,Aug 18,,,
1,2,Clara,,Haus,"Haus,Sommer",Aug 18,x,Für Mama von Clara,
2,3,Clara,,Muster,Schule,Juli 18,,Was Clara alles schon kann,


# create the engine for connection to the db

In [27]:
from sqlalchemy import Column, ForeignKey, Integer, String, Date, Boolean, Table
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
from sqlalchemy import create_engine

Base = declarative_base()
engine = create_engine('postgresql://mahtin@localhost:5432/claragallery', echo=False)
engine.connect()

<sqlalchemy.engine.base.Connection at 0x11d5f4518>

## define tables and create them in PostgreSQL

In [5]:
import os
# absolute path to the picture database
pic_db_path = os.path.join(os.environ['HOME'], 'Dropbox/Icke/Bilder')
# folder in db
picture_folder = 'ClaraPix'
    
image_keywords_association = Table('association', Base.metadata, 
                                   Column('img_id', Integer, ForeignKey('images.id')),
                                   Column('key_id', Integer, ForeignKey('keywords.id')))


class Image(Base):
    __tablename__ = 'images'
    id = Column(Integer, primary_key=True)
    title = Column(String(90))
    path = Column(String(80), nullable=False)
    date = Column(Date(), nullable=False)
    date_assumed = Column(Boolean(), nullable=True)
    note = Column(String(120))
    helper = Column(String(80))
    stars = Column(Integer())
    
    col_id = Column(Integer, ForeignKey('collections.id'))
    collection = relationship("Collection", back_populates='images')
    
    user_id = Column(Integer, ForeignKey('users.id'))
    artist = relationship('User', back_populates='images')
    
    key_id = Column(Integer, ForeignKey('keywords.id'))
    # here, backref is used to create an images field in the Keyword table
    keywords = relationship('Keyword', secondary=image_keywords_association, backref='images')
    
    
    def __repr__(self):
        return f'<Image("{self.title}" by {self.artist.name}, ~/{self.path})>'

    
class Collection(Base):
    __tablename__ = 'collections'
    id = Column(Integer, primary_key=True)
    name = Column(String(80), nullable=False)
    images = relationship('Image', back_populates='collection')

    def __repr__(self):
        return f"<Collection(name:{self.name}, images:{len(self.images)})>"

class User(Base):
    __tablename__ = 'users'
    id = Column(Integer, primary_key=True)
    name = Column(String(80), nullable=False)
    types = Column(String(50))
    age = Column(Integer)
    images = relationship('Image', back_populates='artist')
    
    def __repr__(self):
        return f"<User(name:{self.name}, age:{self.age}, images:{len(self.images)})>"
    
    
class Keyword(Base):
    __tablename__ = 'keywords'
    id = Column(Integer, primary_key=True)
    name = Column(String(50), nullable=False)
    
    def __repr__(self):
        return f"<Keyword(name={self.name}, images:{len(self.images)})>"

    
Base.metadata.create_all(engine)

2019-07-31 14:45:17,422 INFO sqlalchemy.engine.base.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
2019-07-31 14:45:17,423 INFO sqlalchemy.engine.base.Engine {'name': 'association'}
2019-07-31 14:45:17,471 INFO sqlalchemy.engine.base.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
2019-07-31 14:45:17,472 INFO sqlalchemy.engine.base.Engine {'name': 'images'}
2019-07-31 14:45:17,473 INFO sqlalchemy.engine.base.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
2019-07-31 14:45:17,474 INFO sqlalchemy.engine.base.Engine {'name': 'collections'}
2019-07-31 14:45:17,475 INFO sqlalchemy.engine.base.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table

### import table data to the database in a session

In [6]:
from sqlalchemy.orm import sessionmaker
Base.metadata.bind = engine
DBSession = sessionmaker(bind=engine)
session = DBSession()

In [6]:
from datetime import date as dt
title_pat = re.compile(r'^\w')

months = ['jan', 'feb', 'mär', 'apr', 'mai', 'jun', 'jul', 'aug', 'sep', 'okt', 'nov', 'dez']
month_dict = {month[1]:month[0] + 1 for month in enumerate(months)}

def str2date(string):
    if pd.isnull(string) or string == '?':
        return dt(2018, 1,1)
    if '.' in string:
        data = [int(num) for num in string.split('.')]
        return dt(data[2], data[1], data[0])
        
    date = string.split(' ')
    year = int(date[-1])
    year = 2000 + year if (year < 2000) else year
    month = date[0][:3].lower() if (len(date) - 1) else 'jan'
    month = month_dict[month] if month in month_dict.keys() else 1
    return dt(year, month, 1)

def add2db(row):
    '''
    adds rows from the df to the database
    '''
    
    # file and file check
    file = f"Clara{str(row['img # ']).zfill(4)}.jpg"
    # file path relative to database location
    path = os.path.join(picture_folder, file)
    abs_file = os.path.join(pic_db_path, path)
    if not os.path.isfile(abs_file):
        return f'File {path} does not exist'
    title = row['Title'] if row['Title'] == row['Title'] else 'ohne Titel'
    if not title_pat.match(title):
        title = 'ohne Titel'
        

    name = row['Wer'] if pd.notnull(row['Wer']) else 'Clara'
    stars = 0
    
    # if user exists, use this user
    artist = session.query(User).filter_by(name = name).first()
    # else create new user/artist
    if not artist:
        artist = User(name=name, age=6)
    
    # check if image already exists in db
    image = session.query(Image).filter_by(title=title).filter_by(path=path).first()
    if image:
        return f'Image {image.title} exists!'
    # create image instance
    image = Image(title=title, path=path, artist=artist, note=row['Note'], date=str2date(row['Datum']))

    if pd.notnull(row['Set']):
        collection = session.query(Collection).filter_by(name = row['Set']).first()
        if not collection:
            collection = Collection(name=row['Set'])
        image.collection = collection
        
    def get_key(key):
        keyword = session.query(Keyword).filter_by(name = key.strip()).first()
        keyword = keyword if keyword else Keyword(name=key.strip())
        return keyword
    
    if pd.notnull(row['Key']):
        image.keywords = [get_key(key) for key in row['Key'].split(',')]
    if pd.notnull(row['Hilfe von']):
        image.helper = row['Hilfe von']
    date_assumed = row['Datum unbekannt'] in ['x',''] or (row['Datum'] in ['', '?'])
    image.date_assumed = date_assumed
    session.add(image)
    return repr(image)

In [7]:
session.rollback()
pic_table.apply(add2db, axis=1)
session.commit()

0      <Image("Muster" by Clara, ~/ClaraPix/Clara0001...
1      <Image("Haus" by Clara, ~/ClaraPix/Clara0002.j...
2      <Image("Muster" by Clara, ~/ClaraPix/Clara0003...
3      <Image("Pythor" by Clara, ~/ClaraPix/Clara0004...
4      <Image("Herzmuster" by Clara, ~/ClaraPix/Clara...
5      <Image("Skizze Tänzerin" by Clara, ~/ClaraPix/...
6      <Image("Clara an der Straße" by Clara, ~/Clara...
7      <Image("Muster" by Clara, ~/ClaraPix/Clara0008...
8      <Image("Ninjago Truppe" by Clara, ~/ClaraPix/C...
9      <Image("Schlangenmuster" by Clara, ~/ClaraPix/...
10     <Image("Clara und Papa | Landschaft" by Clara,...
11     <Image("Predator X wartet auf Plesiosaurier | ...
12     <Image("Clara im Auto" by Clara, ~/ClaraPix/Cl...
13     <Image("Grün" by Clara, ~/ClaraPix/Clara0014.j...
14     <Image("Fröhlicher Schmetterling" by Clara, ~/...
15     <Image("Clara unter dem Regenbogen" by Clara, ...
16     <Image("Durchschlafen für Anna + Elsa" by Clar...
17     <Image("shift" by Clara,

In [31]:
session.query(Collection).first()
session.query(Image)[3:6]

2019-07-31 15:14:44,574 INFO sqlalchemy.engine.base.Engine SELECT collections.id AS collections_id, collections.name AS collections_name 
FROM collections 
 LIMIT %(param_1)s
2019-07-31 15:14:44,575 INFO sqlalchemy.engine.base.Engine {'param_1': 1}


<Collection(name:Dino-Buch, images:4)>

2019-07-31 15:14:44,579 INFO sqlalchemy.engine.base.Engine SELECT images.id AS images_id, images.title AS images_title, images.path AS images_path, images.date AS images_date, images.date_assumed AS images_date_assumed, images.note AS images_note, images.helper AS images_helper, images.stars AS images_stars, images.col_id AS images_col_id, images.user_id AS images_user_id, images.key_id AS images_key_id 
FROM images 
 LIMIT %(param_1)s OFFSET %(param_2)s
2019-07-31 15:14:44,580 INFO sqlalchemy.engine.base.Engine {'param_1': 3, 'param_2': 3}


[<Image("Pythor" by Clara, ~/ClaraPix/Clara0004.jpg)>,
 <Image("Herzmuster" by Clara, ~/ClaraPix/Clara0005.jpg)>,
 <Image("Skizze Tänzerin" by Clara, ~/ClaraPix/Clara0006.jpg)>]