In [1]:
import pandas
import os
import sys
import sqlalchemy
xls_path = os.path.join(os.environ['HOME'], 'Dropbox/Icke/Bilder/ClaraPix/ClaraPics.xlsx')
pic_table = pandas.read_excel(xls_path, dtype={'Datum':str})
pic_table['Key'] = pic_table['Key'].str.replace(r' ?, ', ',').str.replace(' ',',').str.replace(',&,',' & ').str.replace('Magische,Tiere', 'Magische Tiere')
pic_table.iloc[0]

img #                   1
Wer                 Clara
Hilfe von             NaN
Title              Muster
Key                 Kunst
Datum              Aug 18
Datum unbekannt       NaN
Note                  NaN
Set                   NaN
Name: 0, dtype: object

# create the engine for connection to the db

In [2]:
from sqlalchemy import Column, ForeignKey, Integer, String, Date, Boolean, Table
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
from sqlalchemy import create_engine

Base = declarative_base()
engine = create_engine('postgresql://mahtin@localhost:5432/claragallery', echo=False)
engine.connect()

<sqlalchemy.engine.base.Connection at 0x11262f1d0>

## define tables and create them in PostgreSQL

In [3]:
import os
img_path = 'Dropbox/Icke/Bilder/ClaraPix'

    
image_keywords_association = Table('association', Base.metadata, 
                                   Column('img_id', Integer, ForeignKey('images.id')),
                                   Column('key_id', Integer, ForeignKey('keywords.id')))


class Image(Base):
    __tablename__ = 'images'
    id = Column(Integer, primary_key=True)
    title = Column(String(90))
    path = Column(String(80), nullable=False)
    date = Column(Date(), nullable=False)
    date_assumed = Column(Boolean(), nullable=True)
    note = Column(String(120))
    help = Column(String(80))
    stars = Column(Integer())
    
    col_id = Column(Integer, ForeignKey('collections.id'))
    collection = relationship("Collection", back_populates='images')
    
    user_id = Column(Integer, ForeignKey('users.id'))
    artist = relationship('User', back_populates='images')
    
    key_id = Column(Integer, ForeignKey('keywords.id'))
    keywords = relationship('Keyword', secondary=image_keywords_association, backref='images')
    
    def __repr__(self):
        return f'<Image("{self.title}" by {self.artist.name}, ~/{os.path.join(img_path, self.path)})>'

    
class Collection(Base):
    __tablename__ = 'collections'
    id = Column(Integer, primary_key=True)
    name = Column(String(80), nullable=False)
    images = relationship('Image', back_populates='collection')

    def __repr__(self):
        return f"<Collection(name:{self.name}, images:{len(self.images)})>"

class User(Base):
    __tablename__ = 'users'
    id = Column(Integer, primary_key=True)
    name = Column(String(80), nullable=False)
    types = Column(String(50))
    age = Column(Integer)
    images = relationship('Image', back_populates='artist')
   
    
    def __repr__(self):
        return f"<User(name:{self.name}, age:{self.age}, images:{len(self.images)})>"
    
    
class Keyword(Base):
    __tablename__ = 'keywords'
    id = Column(Integer, primary_key=True)
    name = Column(String(50), nullable=False)
    
    def __repr__(self):
        return f"<Keyword(name={self.name}, images:{len(self.images)})>"

    
Base.metadata.create_all(engine)

## add basic data to the database in a session

In [4]:
from sqlalchemy.orm import sessionmaker
Base.metadata.bind = engine
DBSession = sessionmaker(bind=engine)
session = DBSession()

In [5]:
from datetime import date as dt

months = ['jan', 'feb', 'mär', 'apr', 'mai', 'jun', 'jul', 'aug', 'sep', 'okt', 'nov', 'dez']
month_dict = {month[1]:month[0] + 1 for month in enumerate(months)}

def str2date(string):
    if pd.isnull(string) or string == '?':
        return dt(2018, 1,1)
    if '.' in string:
        data = [int(num) for num in string.split('.')]
        return dt(data[2], data[1], data[0])
        
    date = string.split(' ')
    year = int(date[-1])
    year = 2000 + year if (year < 2000) else year
    month = date[0][:3].lower() if (len(date) - 1) else 'jan'
    month = month_dict[month] if month in month_dict.keys() else 1
    return dt(year, month, 1)

def add2db(row):
    title = row['Title']
    path = f"Clara{str(row['img # ']).zfill(4)}.jpg"
    name = row['Wer'] if pd.notnull(row['Wer']) else 'Clara'
    stars = 0
    
    artist = session.query(User).filter_by(name = name).first()
    if not artist:
        artist = User(name=name, age=6)
    image = Image(title=title, path=path, artist=artist, note=row['Note'], date=str2date(row['Datum']))

    if pd.notnull(row['Set']):
        collection = session.query(Collection).filter_by(name = row['Set']).first()
        if not collection:
            collection = Collection(name=row['Set'])
        image.collection = collection
        
    def get_key(key):
        keyword = session.query(Keyword).filter_by(name = key.strip()).first()
        keyword = keyword if keyword else Keyword(name=key.strip())
        return keyword
    
    if pd.notnull(row['Key']):
        image.keywords = [get_key(key) for key in row['Key'].split(',')]
    if pd.notnull(row['Hilfe von']):
        image.help = row['Hilfe von']
    date_assumed = row['Datum unbekannt'] in ['x',''] or (row['Datum'] in ['', '?'])
    image.date_assumed = date_assumed
    session.add(image)

In [6]:
session.rollback()
pic_table.apply(add2db, axis=1)
session.commit()

0      None
1      None
2      None
3      None
4      None
5      None
6      None
7      None
8      None
9      None
10     None
11     None
12     None
13     None
14     None
15     None
16     None
17     None
18     None
19     None
20     None
21     None
22     None
23     None
24     None
       ... 
414    None
415    None
416    None
417    None
418    None
419    None
420    None
421    None
422    None
423    None
424    None
425    None
426    None
427    None
428    None
429    None
430    None
431    None
432    None
433    None
434    None
435    None
436    None
437    None
438    None
Length: 439, dtype: object

In [62]:
session.query(Keyword.name).all()

[('Kunst'),
 ('Haus'),
 ('Sommer'),
 ('Schule'),
 ('Ninjago'),
 ('Schlange'),
 ('Herz'),
 ('Tanz'),
 ('Clara'),
 ('Schlangen'),
 ('Familie'),
 ('Landschaft'),
 ('Dino'),
 ('Auto'),
 ('Schmetterling'),
 ('Anna & Elsa'),
 ('Planeten'),
 ('Einhorn'),
 ('Mädchen'),
 ('Inventar'),
 ('Pippi'),
 ('Grusel'),
 ('Freunde'),
 ('Tiere'),
 ('Maulwurf'),
 ('Papa'),
 ('Märchen'),
 ('Schema'),
 ('Sticker'),
 ('Pegasus'),
 ('Schwan'),
 ('Dreieck'),
 ('Muster'),
 ('Abdruck'),
 ('Weihnachten'),
 ('Monster'),
 ('Regenbogen'),
 ('Affe'),
 ('Elsa'),
 ('Baum'),
 ('Meer'),
 ('Krokodil'),
 (''),
 ('Spinne'),
 ('Blut'),
 ('Zeitmaschine'),
 ('Stern'),
 ('Wiese'),
 ('Dinos'),
 ('Blumen'),
 ('Skizze'),
 ('Collage'),
 ('Magische Tiere'),
 ('Tier'),
 ('Voldemort'),
 ('Herbst'),
 ('Natur'),
 ('Drachen'),
 ('Essen'),
 ('Kleid'),
 ('Pferde'),
 ('Wasser'),
 ('Ostern'),
 ('Prinzessin'),
 ('Stadt'),
 ('Pferd'),
 ('Kuscheltier'),
 ('Feiern'),
 ('Stillleben'),
 ('Tusche'),
 ('Feen'),
 ('Urzeit'),
 ('Schild'),
 ('Jahreszeite

In [60]:
keyset = ['Urzeit', 'Dinos']
    
session.query(Image).filter('')

[{'title': 'Seite 47 - T-Rex unter Palmen',
  'artist': 'Clara',
  'path': 'Dropbox/Icke/Bilder/ClaraPix/Clara0238.jpg'},
 {'title': 'Spinosaurier',
  'artist': 'Clara',
  'path': 'Dropbox/Icke/Bilder/ClaraPix/Clara0245.jpg'},
 {'title': 'Archeopteryx',
  'artist': 'Clara',
  'path': 'Dropbox/Icke/Bilder/ClaraPix/Clara0250.jpg'},
 {'title': 'Triceratops',
  'artist': 'Clara',
  'path': 'Dropbox/Icke/Bilder/ClaraPix/Clara0256.jpg'},
 {'title': 'Dino mit Kamm',
  'artist': 'Clara',
  'path': 'Dropbox/Icke/Bilder/ClaraPix/Clara0298.jpg'},
 {'title': 'Steindino',
  'artist': 'Clara',
  'path': 'Dropbox/Icke/Bilder/ClaraPix/Clara0299.jpg'},
 {'title': 'Wasserdino',
  'artist': 'Clara',
  'path': 'Dropbox/Icke/Bilder/ClaraPix/Clara0300.jpg'},
 {'title': 'Blitzdino',
  'artist': 'Clara',
  'path': 'Dropbox/Icke/Bilder/ClaraPix/Clara0301.jpg'},
 {'title': 'Diamant-Dino',
  'artist': 'Clara',
  'path': 'Dropbox/Icke/Bilder/ClaraPix/Clara0302.jpg'},
 {'title': 'Ankylo-Stein-Dino',
  'artist': 'C