In [56]:
import sqlalchemy as db
from sqlalchemy import create_engine, inspect, MetaData
import pandas as pd

In [1]:
import os
os.getcwd()

'C:\\Users\\peter\\Coding\\Jupyter'

In [58]:
engine = db.create_engine('sqlite:///C:\\Users\\peter\\Documents\\Projekte\\Zähler\\tc_all_new.db', echo = False)

In [59]:
conn = engine.connect()

#### So komme ich an die Tabellen in der DB!

In [60]:
inspector = inspect(engine)

In [61]:
tab = inspector.get_table_names()
tab = tab[:-1]

#### Und so die Spaltennamen und Typen in einer Tabelle!

In [62]:
columns_table = inspector.get_columns('tcdata_CH_0084_01') 


for c in columns_table :
    print(c['name'], c['type'])

time TEXT
vfr11 REAL
s12 REAL
vfr21 REAL
s22 REAL
vfr1 REAL
s2 REAL


#### Es ist gar nicht so einfach, in SQLalchemy auf Tabellen einer bestehenden DB zuzugreifen. Bei Verwendung von des ORM kann "automate" zur Anwendung kommen. Ohne ORM geht es wie folgt:

In [63]:
metadata = db.MetaData(bind=engine)

In [64]:
table = db.Table('tcdata_CH_0084_01', metadata, autoload = True)

#### Voila, mit der so definierten Tabelle kann ich die Abfragen starten!

In [65]:
q = db.select(table)
result = conn.execute(q)
row = result.fetchmany(6)
row

[('2021-07-27 19:53:00+02:00', 900.0, 68.9000015, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 19:58:00+02:00', 840.0, 65.5, 60.0, 73.0, 0.0, 0.0),
 ('2021-07-27 20:03:00+02:00', 840.0, 67.8000031, 60.0, 72.0, 0.0, 0.0),
 ('2021-07-27 20:08:00+02:00', 600.0, 79.3000031, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 20:13:00+02:00', 720.0, 72.3000031, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 20:18:00+02:00', 780.0, 73.9000015, 0.0, 0.0, 0.0, 0.0)]

#### Wichtige Importe

In [66]:
from sqlalchemy.sql import func, and_, desc, alias, text

#### Ab hier mit group_by

In [67]:
q = db.select(table).where(and_(func.strftime('%m', table.c.time) == '11')).group_by(func.strftime('%w', table.c.time))
result = conn.execute(q)
row = result.fetchmany(25)
row

[('2021-11-07 04:53:00+01:00', 120.0, 67.0, 0.0, 0.0, 0.0, 0.0),
 ('2021-11-01 01:03:00+01:00', 180.0, 73.6999969, 0.0, 0.0, 0.0, 0.0),
 ('2021-11-02 01:03:00+01:00', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
 ('2021-11-03 01:03:00+01:00', 180.0, 64.6999969, 0.0, 0.0, 0.0, 0.0),
 ('2021-11-04 01:03:00+01:00', 60.0, 93.0, 0.0, 0.0, 0.0, 0.0),
 ('2021-11-05 01:03:00+01:00', 60.0, 71.0, 60.0, 75.0, 0.0, 0.0),
 ('2021-11-06 01:03:00+01:00', 360.0, 71.0, 0.0, 0.0, 0.0, 0.0)]

#### order_by

In [68]:
q = db.select(func.strftime('%w', table.c.time), func.max(table.c.s12)).where(and_(func.strftime('%m', table.c.time) == '11')). \
group_by(func.strftime('%w', table.c.time)).order_by(table.c.s12)
result = conn.execute(q)
row = result.fetchmany(25)
row

[('5', 97.0),
 ('2', 102.0),
 ('0', 105.0),
 ('3', 105.0),
 ('4', 108.0),
 ('1', 112.0),
 ('6', 131.0)]

#### Intro eines Alias für table (WICHTIG: *Alias* gilt nur für Tabellen. Spalten kann man mit *Labels* umbenennen...

In [69]:
t = table.alias()

In [70]:
q = db.select(t, func.strftime('%m', t.c.time).label('Monat'))
result = conn.execute(q)
row = result.fetchmany(6)
row

[('2021-07-27 19:53:00+02:00', 900.0, 68.9000015, 0.0, 0.0, 0.0, 0.0, '07'),
 ('2021-07-27 19:58:00+02:00', 840.0, 65.5, 60.0, 73.0, 0.0, 0.0, '07'),
 ('2021-07-27 20:03:00+02:00', 840.0, 67.8000031, 60.0, 72.0, 0.0, 0.0, '07'),
 ('2021-07-27 20:08:00+02:00', 600.0, 79.3000031, 0.0, 0.0, 0.0, 0.0, '07'),
 ('2021-07-27 20:13:00+02:00', 720.0, 72.3000031, 0.0, 0.0, 0.0, 0.0, '07'),
 ('2021-07-27 20:18:00+02:00', 780.0, 73.9000015, 0.0, 0.0, 0.0, 0.0, '07')]

#### Ab hier gucken, was noch gebraucht wird

In [71]:
q = db.select(t, func.strftime('%m', t.c.time).label('Monat'), func.count(t.c.time)).group_by('Monat')
result = conn.execute(q)
row = result.fetchmany(14)
row

[('2022-01-01 01:03:00+01:00', 540.0, 60.4000015, 0.0, 0.0, 0.0, 0.0, '01', 17500),
 ('2022-02-01 01:03:00+01:00', 180.0, 45.0, 0.0, 0.0, 0.0, 0.0, '02', 15837),
 ('2022-03-01 04:53:00+01:00', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, '03', 17660),
 ('2022-04-01 02:03:00+02:00', 60.0, 62.0, 0.0, 0.0, 0.0, 0.0, '04', 17276),
 ('2022-05-01 02:03:00+02:00', 240.0, 63.2999992, 0.0, 0.0, 0.0, 0.0, '05', 13862),
 ('2021-07-27 19:53:00+02:00', 900.0, 68.9000015, 0.0, 0.0, 0.0, 0.0, '07', 3407),
 ('2021-08-01 20:09:00+02:00', 600.0, 71.5999985, 0.0, 0.0, 0.0, 0.0, '08', 17286),
 ('2021-09-01 02:03:00+02:00', 120.0, 66.5, 0.0, 0.0, 0.0, 0.0, '09', 16613),
 ('2021-10-01 02:03:00+02:00', 120.0, 70.0, 0.0, 0.0, 0.0, 0.0, '10', 17238),
 ('2021-11-01 01:03:00+01:00', 180.0, 73.6999969, 0.0, 0.0, 0.0, 0.0, '11', 16612),
 ('2021-12-01 01:03:00+01:00', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, '12', 17059)]

In [72]:
q = db.select(t.c.time, func.strftime('%H', t.c.time).label('Stunde'), func.strftime('%d', t.c.time).label('Tag'), func.count(t.c.time)). \
group_by('Stunde', 'Tag')
result = conn.execute(q)
row = result.fetchmany(14)
row

[('2021-09-01 02:03:00+02:00', '00', '01', 96),
 ('2021-08-02 02:00:00+02:00', '00', '02', 132),
 ('2021-09-03 02:03:00+02:00', '00', '03', 106),
 ('2021-08-04 02:03:00+02:00', '00', '04', 120),
 ('2021-08-05 02:03:00+02:00', '00', '05', 108),
 ('2021-08-06 02:03:00+02:00', '00', '06', 132),
 ('2021-08-07 02:03:00+02:00', '00', '07', 84),
 ('2021-08-08 02:04:00+02:00', '00', '08', 120),
 ('2021-08-09 02:03:00+02:00', '00', '09', 120),
 ('2021-08-10 02:03:00+02:00', '00', '10', 120),
 ('2021-08-11 02:03:00+02:00', '00', '11', 120),
 ('2021-08-12 02:03:00+02:00', '00', '12', 120),
 ('2021-08-13 02:03:00+02:00', '00', '13', 119),
 ('2021-08-14 02:03:00+02:00', '00', '14', 108)]

In [73]:
q = db.select(t.c.time, func.strftime('%H', t.c.time).label('Stunde'), func.strftime('%d', t.c.time).label('Tag'), func.strftime('%w', t.c.time), func.count(t.c.time)). \
group_by(func.strftime('%w', t.c.time))
result = conn.execute(q)
row = result.fetchmany(14)
row

[('2021-08-01 20:09:00+02:00', '18', '01', '0', 23336),
 ('2021-08-02 02:00:00+02:00', '00', '02', '1', 24313),
 ('2021-07-27 19:53:00+02:00', '17', '27', '2', 24001),
 ('2021-07-28 02:03:00+02:00', '00', '28', '3', 24658),
 ('2021-07-29 02:03:00+02:00', '00', '29', '4', 25082),
 ('2021-07-30 02:03:00+02:00', '00', '30', '5', 24370),
 ('2021-07-31 02:03:00+02:00', '00', '31', '6', 24590)]

#### Abfragen mit Zeiträumen als Kriterium

In [74]:
q = db.select(t.c.time, func.strftime('%H', t.c.time).label('Stunde'), func.strftime('%d', t.c.time).label('Tag'), func.strftime('%w', t.c.time), func.count(t.c.time)). \
where(t.c.time >= '2021-11-01 20:09:00+02:00'). \
group_by(func.strftime('%w', t.c.time))
result = conn.execute(q)
row = result.fetchmany(14)
row

[('2021-11-07 04:53:00+01:00', '03', '07', '0', 15566),
 ('2021-11-01 20:13:00+01:00', '19', '01', '1', 16612),
 ('2021-11-02 01:03:00+01:00', '00', '02', '2', 16966),
 ('2021-11-03 01:03:00+01:00', '00', '03', '3', 16394),
 ('2021-11-04 01:03:00+01:00', '00', '04', '4', 17014),
 ('2021-11-05 01:03:00+01:00', '00', '05', '5', 16364),
 ('2021-11-06 01:03:00+01:00', '00', '06', '6', 16374)]

In [75]:
q = db.select(t.c.time, func.strftime('%H', t.c.time).label('Stunde'), func.strftime('%d', t.c.time).label('Tag'), func.strftime('%w', t.c.time).label('Wochentag'), func.count(t.c.time)). \
where(and_(t.c.time >= '2021-11-01 20:09:00+02:00', t.c.time <= '2021-11-09 20:09:00+02:00')). \
group_by(func.strftime('%w', t.c.time))
result = conn.execute(q)
row = result.fetchmany(14)
row

[('2021-11-07 04:53:00+01:00', '03', '07', '0', 221),
 ('2021-11-01 20:13:00+01:00', '19', '01', '1', 624),
 ('2021-11-02 01:03:00+01:00', '00', '02', '2', 1090),
 ('2021-11-03 01:03:00+01:00', '00', '03', '3', 574),
 ('2021-11-04 01:03:00+01:00', '00', '04', '4', 573),
 ('2021-11-05 01:03:00+01:00', '00', '05', '5', 574),
 ('2021-11-06 01:03:00+01:00', '00', '06', '6', 334)]

In [76]:
q = db.select(t.c.time, func.strftime('%w', t.c.time).label('Wochentag'), func.strftime('%H', t.c.time).label('Stunde'), func.count(t.c.time).label('Anzahl'), func.avg(t.c.vfr11).label('AVG')). \
where(t.c.time >= '2021-11-01 20:09:00+02:00'). \
group_by(func.strftime('%w', t.c.time), func.strftime('%H', t.c.time))
result = conn.execute(q)
row = result.fetchmany(10)
row

[('2021-11-14 01:03:00+01:00', '0', '00', 336, 293.39285714285717),
 ('2021-11-14 02:03:00+01:00', '0', '01', 336, 200.0),
 ('2021-11-14 03:03:00+01:00', '0', '02', 330, 163.8181818181818),
 ('2021-11-07 04:53:00+01:00', '0', '03', 444, 164.59459459459458),
 ('2021-11-07 05:00:00+01:00', '0', '04', 1571, 218.87969446212603),
 ('2021-11-14 06:00:00+01:00', '0', '05', 1546, 367.7619663648124),
 ('2021-11-07 07:53:00+01:00', '0', '06', 1463, 522.241968557758),
 ('2021-11-07 08:04:00+01:00', '0', '07', 330, 685.2727272727273),
 ('2021-11-07 09:04:00+01:00', '0', '08', 336, 943.9285714285714),
 ('2021-11-07 10:04:00+01:00', '0', '09', 336, 1176.607142857143)]

#### 1. Eine Abfrage, die nach Wochentag und Stunde gruppiert

In [77]:
q = db.select(t.c.time, func.strftime('%w', t.c.time).label('Wochentag'), func.strftime('%H', t.c.time).label('Stunde'), \
              func.count(t.c.time).label('Anzahl'), func.avg(t.c.vfr11).label('AVG_flow'), func.avg(t.c.s12).label('AVG_speed')). \
where(t.c.time >= '2021-01-01 20:09:00+02:00'). \
group_by(func.strftime('%w', t.c.time), func.strftime('%H', t.c.time))
result = conn.execute(q)
row = result.fetchmany(10)
row

[('2021-08-08 02:04:00+02:00', '0', '00', 478, 263.34728033472805, 64.77154843242684),
 ('2021-08-08 03:04:00+02:00', '0', '01', 491, 188.79837067209775, 63.85682304052957),
 ('2021-08-08 04:04:00+02:00', '0', '02', 484, 163.38842975206612, 62.46322318161159),
 ('2021-08-08 05:04:00+02:00', '0', '03', 660, 175.27272727272728, 61.54727290393932),
 ('2021-08-08 06:00:00+02:00', '0', '04', 2337, 267.75353016688064, 64.2570818250321),
 ('2021-08-08 07:00:00+02:00', '0', '05', 2322, 374.21188630490957, 65.19422928062006),
 ('2021-08-08 08:00:00+02:00', '0', '06', 2179, 557.4575493345571, 66.46392860123903),
 ('2021-08-08 09:03:00+02:00', '0', '07', 484, 802.4380165289256, 67.96756213367769),
 ('2021-08-08 10:03:00+02:00', '0', '08', 491, 1063.0142566191446, 67.27739303014255),
 ('2021-08-08 11:03:00+02:00', '0', '09', 490, 1271.7551020408164, 66.52448972510203)]

In [78]:
df = pd.read_sql_query(q, conn)

In [79]:
df.head(30)

Unnamed: 0,time,Wochentag,Stunde,Anzahl,AVG_flow,AVG_speed
0,2021-08-08 02:04:00+02:00,0,0,478,263.34728,64.771548
1,2021-08-08 03:04:00+02:00,0,1,491,188.798371,63.856823
2,2021-08-08 04:04:00+02:00,0,2,484,163.38843,62.463223
3,2021-08-08 05:04:00+02:00,0,3,660,175.272727,61.547273
4,2021-08-08 06:00:00+02:00,0,4,2337,267.75353,64.257082
5,2021-08-08 07:00:00+02:00,0,5,2322,374.211886,65.194229
6,2021-08-08 08:00:00+02:00,0,6,2179,557.457549,66.463929
7,2021-08-08 09:03:00+02:00,0,7,484,802.438017,67.967562
8,2021-08-08 10:03:00+02:00,0,8,491,1063.014257,67.277393
9,2021-08-08 11:03:00+02:00,0,9,490,1271.755102,66.52449


In [80]:
df.to_csv('0084_01')

#### Bemerkung: Es wäre natürlich einfacher mit -- df.to_csv('file_name.csv',index=False) -- den Index überhaupt nicht abzuspeichern!

#### 2. die gleiche Abfrage, zusätzlich nach Monat und Tag gruppiert

In [81]:
q = db.select(t.c.time, func.strftime('%w', t.c.time).label('Wochentag'), func.strftime('%m', t.c.time).label('Monat'), \
              func.strftime('%d', t.c.time).label('Tag'), func.strftime('%H', t.c.time).label('Stunde'), \
              func.count(t.c.time).label('Anzahl'), func.avg(t.c.vfr11).label('AVG_flow'), func.avg(t.c.s12).label('AVG_speed')). \
where(t.c.time >= '2021-01-01 20:09:00+02:00'). \
group_by(func.strftime('%w', t.c.time), func.strftime('%m', t.c.time), func.strftime('%d', t.c.time), func.strftime('%H', t.c.time))
result = conn.execute(q)
row = result.fetchmany(10)
row

[('2022-01-02 01:03:00+01:00', '0', '01', '02', '00', 12, 140.0, 71.48333359166666),
 ('2022-01-02 02:03:00+01:00', '0', '01', '02', '01', 12, 125.0, 52.05000051666667),
 ('2022-01-02 03:03:00+01:00', '0', '01', '02', '02', 12, 105.0, 62.14999993333333),
 ('2022-01-02 04:03:00+01:00', '0', '01', '02', '03', 17, 70.58823529411765, 49.964705970588234),
 ('2022-01-02 05:00:00+01:00', '0', '01', '02', '04', 60, 134.0, 63.10166663833334),
 ('2022-01-02 06:00:00+01:00', '0', '01', '02', '05', 57, 402.10526315789474, 69.3245614017544),
 ('2022-01-02 07:00:00+01:00', '0', '01', '02', '06', 57, 464.2105263157895, 68.54912320000001),
 ('2022-01-02 08:03:00+01:00', '0', '01', '02', '07', 12, 460.0, 70.30833308333332),
 ('2022-01-02 09:03:00+01:00', '0', '01', '02', '08', 12, 600.0, 67.08333365833333),
 ('2022-01-02 10:03:00+01:00', '0', '01', '02', '09', 12, 1015.0, 68.50833383333334)]

In [82]:
df = pd.read_sql_query(q, conn)
df.head(30)

Unnamed: 0,time,Wochentag,Monat,Tag,Stunde,Anzahl,AVG_flow,AVG_speed
0,2022-01-02 01:03:00+01:00,0,1,2,0,12,140.0,71.483334
1,2022-01-02 02:03:00+01:00,0,1,2,1,12,125.0,52.050001
2,2022-01-02 03:03:00+01:00,0,1,2,2,12,105.0,62.15
3,2022-01-02 04:03:00+01:00,0,1,2,3,17,70.588235,49.964706
4,2022-01-02 05:00:00+01:00,0,1,2,4,60,134.0,63.101667
5,2022-01-02 06:00:00+01:00,0,1,2,5,57,402.105263,69.324561
6,2022-01-02 07:00:00+01:00,0,1,2,6,57,464.210526,68.549123
7,2022-01-02 08:03:00+01:00,0,1,2,7,12,460.0,70.308333
8,2022-01-02 09:03:00+01:00,0,1,2,8,12,600.0,67.083334
9,2022-01-02 10:03:00+01:00,0,1,2,9,12,1015.0,68.508334


In [83]:
df.to_csv('0084_01_det', index = False)

#### Example with join

In [85]:
table2 = db.Table('tcdata_CH_0084_02', metadata, autoload = True)

In [86]:
from sqlalchemy import join


In [87]:
q = db.select(table2)
result = conn.execute(q)
row = result.fetchmany(6)
row

[('2021-07-27 19:53:00+02:00', 180.0, 81.6999969, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 19:58:00+02:00', 420.0, 85.6999969, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 20:03:00+02:00', 120.0, 82.0, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 20:08:00+02:00', 240.0, 80.3000031, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 20:13:00+02:00', 60.0, 75.0, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 20:18:00+02:00', 60.0, 84.0, 0.0, 0.0, 0.0, 0.0)]

from sqlalchemy.sql import select

In [88]:
q = db.select(t)
result = conn.execute(q)
row = result.fetchmany(6)
row

[('2021-07-27 19:53:00+02:00', 900.0, 68.9000015, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 19:58:00+02:00', 840.0, 65.5, 60.0, 73.0, 0.0, 0.0),
 ('2021-07-27 20:03:00+02:00', 840.0, 67.8000031, 60.0, 72.0, 0.0, 0.0),
 ('2021-07-27 20:08:00+02:00', 600.0, 79.3000031, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 20:13:00+02:00', 720.0, 72.3000031, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 20:18:00+02:00', 780.0, 73.9000015, 0.0, 0.0, 0.0, 0.0)]

In [89]:
j = table.join(table2, table.c.time == table2.c.time)

In [90]:
j

<sqlalchemy.sql.selectable.Join at 0x1c939157130; Join object on tcdata_CH_0084_01(1963756002992) and tcdata_CH_0084_02(1963754746112)>

In [91]:
q = db.select(j)
result = conn.execute(q)
row = result.fetchmany(6)
row

[('2021-07-27 19:53:00+02:00', 900.0, 68.9000015, 0.0, 0.0, 0.0, 0.0, '2021-07-27 19:53:00+02:00', 180.0, 81.6999969, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 19:58:00+02:00', 840.0, 65.5, 60.0, 73.0, 0.0, 0.0, '2021-07-27 19:58:00+02:00', 420.0, 85.6999969, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 20:03:00+02:00', 840.0, 67.8000031, 60.0, 72.0, 0.0, 0.0, '2021-07-27 20:03:00+02:00', 120.0, 82.0, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 20:08:00+02:00', 600.0, 79.3000031, 0.0, 0.0, 0.0, 0.0, '2021-07-27 20:08:00+02:00', 240.0, 80.3000031, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 20:13:00+02:00', 720.0, 72.3000031, 0.0, 0.0, 0.0, 0.0, '2021-07-27 20:13:00+02:00', 60.0, 75.0, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 20:18:00+02:00', 780.0, 73.9000015, 0.0, 0.0, 0.0, 0.0, '2021-07-27 20:18:00+02:00', 60.0, 84.0, 0.0, 0.0, 0.0, 0.0)]

In [92]:
result = conn.execute(f"SELECT * FROM {j}")
  
print(result.keys())

RMKeyView(['time', 'vfr11', 's12', 'vfr21', 's22', 'vfr1', 's2', 'time', 'vfr11', 's12', 'vfr21', 's22', 'vfr1', 's2'])


#### Ok, wir sehen also oben das Problem der identischen Spaltennamen!

#### In der nächsten Abfrage sieht man, wie man die Spalte bei der Abfrage neu labeln kann!

In [93]:
q = db.select(t.c.vfr11.label('huhu'), table2.c.time).select_from(
    t.join(table2, 
            t.c.time == table2.c.time)
)
result = conn.execute(q)
row = result.fetchmany(6)
row

[(900.0, '2021-07-27 19:53:00+02:00'),
 (840.0, '2021-07-27 19:58:00+02:00'),
 (840.0, '2021-07-27 20:03:00+02:00'),
 (600.0, '2021-07-27 20:08:00+02:00'),
 (720.0, '2021-07-27 20:13:00+02:00'),
 (780.0, '2021-07-27 20:18:00+02:00')]

In [94]:
dfjoin = pd.read_sql_query(q, conn)
dfjoin.head()

Unnamed: 0,huhu,time
0,900.0,2021-07-27 19:53:00+02:00
1,840.0,2021-07-27 19:58:00+02:00
2,840.0,2021-07-27 20:03:00+02:00
3,600.0,2021-07-27 20:08:00+02:00
4,720.0,2021-07-27 20:13:00+02:00


### Hier mal probieren wie das mit dem Neulabeling der Spalten von Spur 1 und Spur 2 funktioniert

In [108]:
q = db.select(t.c.time, func.strftime('%w', t.c.time).label('Wochentag'),  t.c.vfr11.label('f84_1'), table2.c.vfr11.label('f84_2'), table2.c.time).select_from(
    t.join(table2, 
            t.c.time == table2.c.time)
)
result = conn.execute(q)
row = result.fetchmany(6)
row

[('2021-07-27 19:53:00+02:00', '2', 900.0, 180.0, '2021-07-27 19:53:00+02:00'),
 ('2021-07-27 19:58:00+02:00', '2', 840.0, 420.0, '2021-07-27 19:58:00+02:00'),
 ('2021-07-27 20:03:00+02:00', '2', 840.0, 120.0, '2021-07-27 20:03:00+02:00'),
 ('2021-07-27 20:08:00+02:00', '2', 600.0, 240.0, '2021-07-27 20:08:00+02:00'),
 ('2021-07-27 20:13:00+02:00', '2', 720.0, 60.0, '2021-07-27 20:13:00+02:00'),
 ('2021-07-27 20:18:00+02:00', '2', 780.0, 60.0, '2021-07-27 20:18:00+02:00')]

In [109]:
dfjoin = pd.read_sql_query(q, conn)
dfjoin.head()

Unnamed: 0,time,Wochentag,f84_1,f84_2,time_1
0,2021-07-27 19:53:00+02:00,2,900.0,180.0,2021-07-27 19:53:00+02:00
1,2021-07-27 19:58:00+02:00,2,840.0,420.0,2021-07-27 19:58:00+02:00
2,2021-07-27 20:03:00+02:00,2,840.0,120.0,2021-07-27 20:03:00+02:00
3,2021-07-27 20:08:00+02:00,2,600.0,240.0,2021-07-27 20:08:00+02:00
4,2021-07-27 20:13:00+02:00,2,720.0,60.0,2021-07-27 20:13:00+02:00


#### Die column names, wenn ich nicht neu label

In [98]:
q = db.select(j)
result = conn.execute(q)
row = result.fetchmany(6)
row

[('2021-07-27 19:53:00+02:00', 900.0, 68.9000015, 0.0, 0.0, 0.0, 0.0, '2021-07-27 19:53:00+02:00', 180.0, 81.6999969, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 19:58:00+02:00', 840.0, 65.5, 60.0, 73.0, 0.0, 0.0, '2021-07-27 19:58:00+02:00', 420.0, 85.6999969, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 20:03:00+02:00', 840.0, 67.8000031, 60.0, 72.0, 0.0, 0.0, '2021-07-27 20:03:00+02:00', 120.0, 82.0, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 20:08:00+02:00', 600.0, 79.3000031, 0.0, 0.0, 0.0, 0.0, '2021-07-27 20:08:00+02:00', 240.0, 80.3000031, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 20:13:00+02:00', 720.0, 72.3000031, 0.0, 0.0, 0.0, 0.0, '2021-07-27 20:13:00+02:00', 60.0, 75.0, 0.0, 0.0, 0.0, 0.0),
 ('2021-07-27 20:18:00+02:00', 780.0, 73.9000015, 0.0, 0.0, 0.0, 0.0, '2021-07-27 20:18:00+02:00', 60.0, 84.0, 0.0, 0.0, 0.0, 0.0)]

In [99]:
dfjoin = pd.read_sql_query(q, conn)
dfjoin.head()

Unnamed: 0,time,vfr11,s12,vfr21,s22,vfr1,s2,time_1,vfr11_1,s12_1,vfr21_1,s22_1,vfr1_1,s2_1
0,2021-07-27 19:53:00+02:00,900.0,68.900002,0.0,0.0,0.0,0.0,2021-07-27 19:53:00+02:00,180.0,81.699997,0.0,0.0,0.0,0.0
1,2021-07-27 19:58:00+02:00,840.0,65.5,60.0,73.0,0.0,0.0,2021-07-27 19:58:00+02:00,420.0,85.699997,0.0,0.0,0.0,0.0
2,2021-07-27 20:03:00+02:00,840.0,67.800003,60.0,72.0,0.0,0.0,2021-07-27 20:03:00+02:00,120.0,82.0,0.0,0.0,0.0,0.0
3,2021-07-27 20:08:00+02:00,600.0,79.300003,0.0,0.0,0.0,0.0,2021-07-27 20:08:00+02:00,240.0,80.300003,0.0,0.0,0.0,0.0
4,2021-07-27 20:13:00+02:00,720.0,72.300003,0.0,0.0,0.0,0.0,2021-07-27 20:13:00+02:00,60.0,75.0,0.0,0.0,0.0,0.0


In [100]:
dfjoin.columns

Index(['time', 'vfr11', 's12', 'vfr21', 's22', 'vfr1', 's2', 'time_1',
       'vfr11_1', 's12_1', 'vfr21_1', 's22_1', 'vfr1_1', 's2_1'],
      dtype='object')

#### Jetzt eine Abfrage zur Identifikation von Staus

In [112]:
q = db.select(t.c.time, func.strftime('%w', t.c.time).label('Wochentag'), func.strftime('%m', t.c.time).label('Monat'), \
              func.strftime('%d', t.c.time).label('Tag'), func.strftime('%H', t.c.time).label('Stunde'), \
              func.count(t.c.time).label('Anzahl'), func.avg(t.c.vfr11).label('AVG_flow'), func.avg(t.c.s12).label('AVG_speed')). \
where(t.c.time >= '2021-01-01 20:09:00+02:00').\
group_by(func.strftime('%w', t.c.time), func.strftime('%m', t.c.time), func.strftime('%d', t.c.time), func.strftime('%H', t.c.time))
result = conn.execute(q)
row = result.fetchmany(10)
row

[('2022-01-02 01:03:00+01:00', '0', '01', '02', '00', 12, 140.0, 71.48333359166666),
 ('2022-01-02 02:03:00+01:00', '0', '01', '02', '01', 12, 125.0, 52.05000051666667),
 ('2022-01-02 03:03:00+01:00', '0', '01', '02', '02', 12, 105.0, 62.14999993333333),
 ('2022-01-02 04:03:00+01:00', '0', '01', '02', '03', 17, 70.58823529411765, 49.964705970588234),
 ('2022-01-02 05:00:00+01:00', '0', '01', '02', '04', 60, 134.0, 63.10166663833334),
 ('2022-01-02 06:00:00+01:00', '0', '01', '02', '05', 57, 402.10526315789474, 69.3245614017544),
 ('2022-01-02 07:00:00+01:00', '0', '01', '02', '06', 57, 464.2105263157895, 68.54912320000001),
 ('2022-01-02 08:03:00+01:00', '0', '01', '02', '07', 12, 460.0, 70.30833308333332),
 ('2022-01-02 09:03:00+01:00', '0', '01', '02', '08', 12, 600.0, 67.08333365833333),
 ('2022-01-02 10:03:00+01:00', '0', '01', '02', '09', 12, 1015.0, 68.50833383333334)]

In [105]:
q = db.select(t.c.time, func.strftime('%Y', t.c.time).label('Jahr'), func.strftime('%m', t.c.time).label('Monat'), \
              func.strftime('%d', t.c.time).label('Tag'), func.strftime('%H', t.c.time).label('Stunde'), \
              func.count(t.c.time).label('Anzahl'), func.avg(t.c.vfr11).label('AVG_flow'), func.avg(t.c.s12).label('AVG_speed')). \
where(and_(t.c.s12 >=0, t.c.s12 <40 )) .\
group_by(func.strftime('%w', t.c.time), func.strftime('%m', t.c.time), func.strftime('%d', t.c.time), func.strftime('%H', t.c.time)). \
having(func.count(t.c.time) > 40). \
order_by(func.strftime('%m', t.c.time).desc(), func.strftime('%d', t.c.time))
result = conn.execute(q)
row = result.fetchmany(10)
row

[('2021-12-01 17:00:00+01:00', '2021', '12', '01', '16', 55, 1156.3636363636363, 18.894545495272723),
 ('2021-12-02 16:02:00+01:00', '2021', '12', '02', '15', 56, 1320.0, 26.607142805),
 ('2021-12-02 17:01:00+01:00', '2021', '12', '02', '16', 54, 1374.4444444444443, 23.35555557185185),
 ('2021-12-03 17:00:00+01:00', '2021', '12', '03', '16', 47, 1972.340425531915, 34.49148940851064),
 ('2021-12-08 16:00:00+01:00', '2021', '12', '08', '15', 54, 1472.2222222222222, 30.675926038888882),
 ('2021-12-08 17:00:00+01:00', '2021', '12', '08', '16', 55, 1244.7272727272727, 22.232727330727275),
 ('2021-12-17 16:10:00+01:00', '2021', '12', '17', '15', 41, 1813.1707317073171, 31.387804587804883),
 ('2021-11-02 17:02:00+01:00', '2021', '11', '02', '16', 48, 1941.25, 31.937500014583332),
 ('2021-11-03 17:01:00+01:00', '2021', '11', '03', '16', 50, 1959.6, 31.990000040000005),
 ('2021-11-04 17:00:00+01:00', '2021', '11', '04', '16', 46, 1820.8695652173913, 30.752174039130434)]

#### Und nun die gleiche Query unter Verwendung der Label bei HAVING und ORDER_BY und Rundung der Averages

In [106]:
q = db.select(t.c.time, func.strftime('%Y', t.c.time).label('Jahr'), func.strftime('%m', t.c.time).label('Monat'), \
              func.strftime('%d', t.c.time).label('Tag'), func.strftime('%H', t.c.time).label('Stunde'), \
              func.count(t.c.time).label('Anzahl'), func.round(func.avg(t.c.vfr11)).label('AVG_flow'), func.round(func.avg(t.c.s12).label('AVG_speed'),1)). \
where(and_(t.c.s12 >=0, t.c.s12 <40 )) .\
group_by(func.strftime('%w', t.c.time), func.strftime('%m', t.c.time), func.strftime('%d', t.c.time), func.strftime('%H', t.c.time)). \
having(text('Anzahl > 40')). \
order_by(desc('Monat'), 'Tag')
result = conn.execute(q)
row = result.fetchmany(10)
row

[('2021-12-01 17:00:00+01:00', '2021', '12', '01', '16', 55, 1156.0, 18.9),
 ('2021-12-02 16:02:00+01:00', '2021', '12', '02', '15', 56, 1320.0, 26.6),
 ('2021-12-02 17:01:00+01:00', '2021', '12', '02', '16', 54, 1374.0, 23.4),
 ('2021-12-03 17:00:00+01:00', '2021', '12', '03', '16', 47, 1972.0, 34.5),
 ('2021-12-08 16:00:00+01:00', '2021', '12', '08', '15', 54, 1472.0, 30.7),
 ('2021-12-08 17:00:00+01:00', '2021', '12', '08', '16', 55, 1245.0, 22.2),
 ('2021-12-17 16:10:00+01:00', '2021', '12', '17', '15', 41, 1813.0, 31.4),
 ('2021-11-02 17:02:00+01:00', '2021', '11', '02', '16', 48, 1941.0, 31.9),
 ('2021-11-03 17:01:00+01:00', '2021', '11', '03', '16', 50, 1960.0, 32.0),
 ('2021-11-04 17:00:00+01:00', '2021', '11', '04', '16', 46, 1821.0, 30.8)]

In [107]:
dfjam = pd.read_sql_query(q, conn)
dfjam.head()

Unnamed: 0,time,Jahr,Monat,Tag,Stunde,Anzahl,AVG_flow,round_1
0,2021-12-01 17:00:00+01:00,2021,12,1,16,55,1156.0,18.9
1,2021-12-02 16:02:00+01:00,2021,12,2,15,56,1320.0,26.6
2,2021-12-02 17:01:00+01:00,2021,12,2,16,54,1374.0,23.4
3,2021-12-03 17:00:00+01:00,2021,12,3,16,47,1972.0,34.5
4,2021-12-08 16:00:00+01:00,2021,12,8,15,54,1472.0,30.7


#### Man beachte den Effekt der Positionierung der Labels!!