## Pivoting Start/End OD Data
***

The following function `pivot_se` pivots a start/end table, i.e. it takes the binary matrix values and translates them to more readable dataframe containing the names of the start and end intersections as cells as opposed to 1's and 0's.

In [7]:
## import relevant modules

import configparser
from psycopg2 import connect
import psycopg2.sql as pg
import pandas.io.sql as pandasql
import pandas as pd
from sqlalchemy import create_engine
import io

CONFIG = configparser.ConfigParser()
CONFIG.read(r'C:\Users\alouis2\Documents\Python Scripts\db.cfg')
dbset = CONFIG['DBSETTINGS']
con = connect(**dbset)

In [8]:
# connect to database
string = 'SELECT * FROM alouis2.startend_path'
df = pandasql.read_sql(pg.SQL(string), con)

df.head(15)

Unnamed: 0,datetime_bin,userid,s_co_ba,s_co_un,s_co_pa,s_du_ro,s_du_df,s_du_ba,s_du_sp,s_du_un,...,e_fr_ba,e_fr_sp,e_fr_un,e_fr_yo,e_fr_ja,e_fr_pa,e_fb_ba,e_fb_sp,e_fb_yk,e_ea_bv
0,2018-01-01 00:00:17,-4109236477481291089,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2018-01-01 00:01:08,-1672501126208601408,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2018-01-01 00:01:11,-5833274664015405017,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2018-01-01 00:02:08,8147950039115731659,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2018-01-01 00:02:11,2538540623439968621,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,2018-01-01 00:02:47,1200906276903573645,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,2018-01-01 00:02:59,-4164760066955224419,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,2018-01-01 00:02:59,8620276987661645868,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,2018-01-01 00:03:16,6853760512054168715,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,2018-01-01 00:03:22,-7601830370391951161,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
def pivot_se(startend_df):
    se = list(df.columns)[2:]
    reformatted = []
    for i in range(len(startend_df)):
        l = []
        l.append(startend_df.loc[i,]['datetime_bin'])
        l.append(startend_df.loc[i,]['userid'])    
        for j in se:
            if startend_df[j].values[i] == 1:
                l.append(j[2:])
        reformatted.append(l)
    labels = ['datetime_bin', 'userid', 'start', 'end']
    return pd.DataFrame.from_records(reformatted, columns=labels)

newdf = pivot_se(df)
newdf.head(15)

Unnamed: 0,datetime_bin,userid,start,end
0,2018-01-01 00:00:17,-4109236477481291089,du_yo,du_ja
1,2018-01-01 00:01:08,-1672501126208601408,du_pa,qu_bv
2,2018-01-01 00:01:11,-5833274664015405017,du_ja,du_yo
3,2018-01-01 00:02:08,8147950039115731659,du_pa,du_ja
4,2018-01-01 00:02:11,2538540623439968621,kn_df,du_df
5,2018-01-01 00:02:47,1200906276903573645,fr_ja,du_ja
6,2018-01-01 00:02:59,-4164760066955224419,du_df,qu_ro
7,2018-01-01 00:02:59,8620276987661645868,ad_ba,qu_ba
8,2018-01-01 00:03:16,6853760512054168715,co_un,du_un
9,2018-01-01 00:03:22,-7601830370391951161,qu_sp,co_un


In [16]:
CONFIG.read(r'C:\Users\alouis2\Documents\Python Scripts\engine.cfg')
engine = create_engine(CONFIG['engine']['e'])
conn=engine.raw_connection()
cur = conn.cursor()
output = io.StringIO()
newdf.to_csv(output, sep='\t', header=False, index=False)
output.seek(0)
contents = output.getvalue()
cur.copy_from(output, 'alouis2.startend_pivot', null="") #null values become ''   
conn.commit()



In [23]:
string = 'SELECT * FROM alouis2.complete'
df = pandasql.read_sql(pg.SQL(string), con)
df

Unnamed: 0,datetime_bin,userid,start,end,co_ba,co_un,co_pa,du_ro,du_df,du_ba,...,fr_sp,fr_un,fr_yo,fr_ja,fr_pa,fb_ba,fb_sp,fb_yk,ea_bv,path_total
0,2018-01-01 00:00:17,-4109236477481291089,du_yo,du_ja,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
1,2018-01-01 00:00:17,-4109236477481291089,du_yo,du_ja,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
2,2018-01-01 00:01:08,-1672501126208601408,du_pa,qu_bv,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
3,2018-01-01 00:01:08,-1672501126208601408,du_pa,qu_bv,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
4,2018-01-01 00:01:11,-5833274664015405017,du_ja,du_yo,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
5,2018-01-01 00:01:11,-5833274664015405017,du_ja,du_yo,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
6,2018-01-01 00:02:08,8147950039115731659,du_pa,du_ja,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
7,2018-01-01 00:02:08,8147950039115731659,du_pa,du_ja,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
8,2018-01-01 00:02:11,2538540623439968621,kn_df,du_df,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,2
9,2018-01-01 00:02:11,2538540623439968621,kn_df,du_df,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,2


The function `pivot_path` takes in a path table from SQL, and converts its binary matrix into a path column, where each row of the column contains a list containing the corresponding path. 

In [24]:
def pivot_path(path_df):
    se = list(df.columns)[4:]
    reformatted = []
    for i in range(len(path_df)):
        l = []
        l.append(path_df.loc[i,]['datetime_bin'])
        l.append(path_df.loc[i,]['userid'])
        l.append(path_df.loc[i,]['start'])
        l.append(path_df.loc[i,]['end'])
        pathval = []
        for j in se:
            if df[j].values[i] == 1:
                pathval.append(j)
        l.append(pathval)
        reformatted.append(l)
    labels = ['datetime_bin', 'userid', 'start', 'end', 'path']
    return pd.DataFrame.from_records(reformatted, columns=labels)
    
newdf = pivot_path(df)

In [25]:
CONFIG.read(r'C:\Users\alouis2\Documents\Python Scripts\engine.cfg')
engine = create_engine(CONFIG['engine']['e'])
conn=engine.raw_connection()
cur = conn.cursor()
output = io.StringIO()
newdf.to_csv(output, sep='\t', header=False, index=False)
output.seek(0)
contents = output.getvalue()
cur.copy_from(output, 'alouis2.others_pivot', null="") #null values become ''   
conn.commit

In [27]:
conn.close()
newdf.head(10)

Unnamed: 0,datetime_bin,userid,start,end,path
0,2018-01-01 00:00:17,-4109236477481291089,du_yo,du_ja,"[du_yo, du_ja]"
1,2018-01-01 00:00:17,-4109236477481291089,du_yo,du_ja,"[du_yo, du_ja]"
2,2018-01-01 00:01:08,-1672501126208601408,du_pa,qu_bv,"[du_pa, qu_bv]"
3,2018-01-01 00:01:08,-1672501126208601408,du_pa,qu_bv,"[du_pa, qu_bv]"
4,2018-01-01 00:01:11,-5833274664015405017,du_ja,du_yo,"[du_yo, du_ja]"
5,2018-01-01 00:01:11,-5833274664015405017,du_ja,du_yo,"[du_yo, du_ja]"
6,2018-01-01 00:02:08,8147950039115731659,du_pa,du_ja,"[du_ja, du_pa]"
7,2018-01-01 00:02:08,8147950039115731659,du_pa,du_ja,"[du_ja, du_pa]"
8,2018-01-01 00:02:11,2538540623439968621,kn_df,du_df,"[du_df, kn_df]"
9,2018-01-01 00:02:11,2538540623439968621,kn_df,du_df,"[du_df, kn_df]"
