![raptorQube](./images/raptorqube.jpg)

# Andreas Francois Vermeulen

![Vermeulen](./images/vermeulen.png)

### Supervisors: Dr Juliana Kuster Filipe Bowles / Dr Vladimir Janjic - University of St Andrews

![St Andrews](./images/standrews.jpg)

![raptorQube](./images/raptorqube2.jpg)

# Crawler-Chromosome Builder

![Crawler](./images/BasicCrawlerSmall.png)

## DNA Builder

In [1]:
import itertools as it
import numpy as np
import sqlite3 as sq
from sqlite3 import Error
import os
import pandas as pd

In [2]:
newstart=False

![Crawler](./images/crawler.png)

### Crawler Build

In [3]:
i=list(range(1,21,1))
inputChromosomeList = ['IC-%06d' % x for x in i]

In [4]:
p=list(range(1,31,1))
processChromosomeList = ['PC-%06d' % x for x in p]

In [5]:
o=list(range(1,11,1))
outputChromosomeList = ['OC-%06d' % x for x in o]

In [6]:
cw=np.array(tuple(it.product(inputChromosomeList, processChromosomeList, outputChromosomeList)))
cw.shape

(6000, 3)

In [7]:
a=list(range(1,cw.shape[0]+1,1))

In [8]:
crawlerChromosomeList = np.array(tuple(['CW-%09d' % x for x in a]))
crawlerChromosomeList.shape

(6000,)

In [9]:
yokeControlChromosomeList  = np.array(tuple(['CCY-%09d' % x for x in a]))
yokeControlChromosomeList.shape

(6000,)

In [10]:
crawler = np.column_stack((crawlerChromosomeList,yokeControlChromosomeList,cw))
crawler.shape

(6000, 5)

In [11]:
crawler[1]

array(['CW-000000002', 'CCY-000000002', 'IC-000001', 'PC-000001',
       'OC-000002'], dtype='<U13')

### Yoke Build

In [12]:
yokeMasterChromosomeList  = np.array(tuple(['CMY-%09d' % 1 for x in a]))
yokeMasterChromosomeList.shape

(6000,)

In [13]:
yokeSlaveChromosomeList  = np.array(tuple(['CSY-%09d' % x for x in a]))
yokeSlaveChromosomeList.shape

(6000,)

In [14]:
yoke = np.column_stack((yokeMasterChromosomeList,yokeSlaveChromosomeList))
yoke.shape

(6000, 2)

In [15]:
yoke[1]

array(['CMY-000000001', 'CSY-000000002'], dtype='<U13')

## DNA Build

In [16]:
dna = np.column_stack((yoke,crawler))

In [17]:
dna.shape

(6000, 7)

In [18]:
dna[1]

array(['CMY-000000001', 'CSY-000000002', 'CW-000000002', 'CCY-000000002',
       'IC-000001', 'PC-000001', 'OC-000002'], dtype='<U13')

## Store Crawlers

In [19]:
rifDirName=os.path.abspath('./000-RIF')
if not os.path.exists(rifDirName):
    print('Creating new RIF directory')
    print(rifDirName)
    os.makedirs(rifDirName)
else:
    print('Using RIF directory')
    print(rifDirName)

Using RIF directory
C:\Users\sirve\Documents\My PhD\jupyter\Crawler-Chromosome\000-RIF


In [20]:
funcDirName=os.path.join(rifDirName,'100-Functional-Layer')
if not os.path.exists(funcDirName):
    print('Creating new RIF:Functional-Layer directory')
    print(funcDirName)
    os.makedirs(funcDirName)
else:
    print('Using RIF:Functional-Layer directory')
    print(funcDirName)

Using RIF:Functional-Layer directory
C:\Users\sirve\Documents\My PhD\jupyter\Crawler-Chromosome\000-RIF\100-Functional-Layer


In [21]:
opsDirList = [
    '100-Crawler-Master-Service-Definitions',
    '200-Crawler-Yoke-Definitions',
    '300-Crawler-Input-Definitions',
    '400-Crawler-Process-Definitions',
    '500-Crawler-Output-Definitions'
]
for opsDir in opsDirList:
    opsDirName=os.path.join(rifDirName, '200-Operational-Management-Layer', '100-Crawler-Definitions', opsDir)
    if not os.path.exists(opsDirName):
        print('\nCreating new RIF:Operational-Management-Layer directory - %s' % opsDir)
        print(opsDirName)
        os.makedirs(opsDirName)
    else:
        print('\nUsing RIF:Operational-Management-Layer directory - %s' % opsDir)
        print(opsDirName)


Using RIF:Operational-Management-Layer directory - 100-Crawler-Master-Service-Definitions
C:\Users\sirve\Documents\My PhD\jupyter\Crawler-Chromosome\000-RIF\200-Operational-Management-Layer\100-Crawler-Definitions\100-Crawler-Master-Service-Definitions

Using RIF:Operational-Management-Layer directory - 200-Crawler-Yoke-Definitions
C:\Users\sirve\Documents\My PhD\jupyter\Crawler-Chromosome\000-RIF\200-Operational-Management-Layer\100-Crawler-Definitions\200-Crawler-Yoke-Definitions

Using RIF:Operational-Management-Layer directory - 300-Crawler-Input-Definitions
C:\Users\sirve\Documents\My PhD\jupyter\Crawler-Chromosome\000-RIF\200-Operational-Management-Layer\100-Crawler-Definitions\300-Crawler-Input-Definitions

Using RIF:Operational-Management-Layer directory - 400-Crawler-Process-Definitions
C:\Users\sirve\Documents\My PhD\jupyter\Crawler-Chromosome\000-RIF\200-Operational-Management-Layer\100-Crawler-Definitions\400-Crawler-Process-Definitions

Using RIF:Operational-Management-La

## Generate Base Master-Service-Definitions

In [22]:
def create_connection(db_file):
    """ create a database connection to a SQLite database """
    try:
        conn = sq.connect(db_file)
        return sq.version
    except Error as e:
        print(e)
    finally:
        conn.close()

In [23]:
dbnamedir='./000-RIF/200-Operational-Management-Layer/100-Crawler-Definitions/100-Crawler-Master-Service-Definitions/'
dbname='Crawler-Master-Service-Definitions.db'
dbfullname=os.path.join(dbnamedir, dbname)
print(dbfullname)

./000-RIF/200-Operational-Management-Layer/100-Crawler-Definitions/100-Crawler-Master-Service-Definitions/Crawler-Master-Service-Definitions.db


In [24]:
if newstart: 
    if not os.path.exists(dbfullname):
        print('Create DB: %s - %s' % (dbfullname, create_connection(dbfullname)))
    else:
        print('Remove DB: %s' % dbfullname)
        os.remove(dbfullname)
        print('Create DB: %s - %s' % (dbfullname, create_connection(dbfullname)))
else:
    print('Using DB: %s' % dbfullname)

Using DB: ./000-RIF/200-Operational-Management-Layer/100-Crawler-Definitions/100-Crawler-Master-Service-Definitions/Crawler-Master-Service-Definitions.db


In [25]:
df = pd.DataFrame(dna)

columnnames=['CrawlerMasterYoke',
             'CrawlerSlaveYoke',
             'CrawlerName',
             'CrawlerControlYoke',
             'CrawlerInputChromosome',
             'CrawlerProcessChromosome',
             'CrawlerOutputChromosome'
            ]

df.columns = columnnames

print(df.shape)
print(df.columns)

(6000, 7)
Index(['CrawlerMasterYoke', 'CrawlerSlaveYoke', 'CrawlerName',
       'CrawlerControlYoke', 'CrawlerInputChromosome',
       'CrawlerProcessChromosome', 'CrawlerOutputChromosome'],
      dtype='object')


In [26]:
df.head()

Unnamed: 0,CrawlerMasterYoke,CrawlerSlaveYoke,CrawlerName,CrawlerControlYoke,CrawlerInputChromosome,CrawlerProcessChromosome,CrawlerOutputChromosome
0,CMY-000000001,CSY-000000001,CW-000000001,CCY-000000001,IC-000001,PC-000001,OC-000001
1,CMY-000000001,CSY-000000002,CW-000000002,CCY-000000002,IC-000001,PC-000001,OC-000002
2,CMY-000000001,CSY-000000003,CW-000000003,CCY-000000003,IC-000001,PC-000001,OC-000003
3,CMY-000000001,CSY-000000004,CW-000000004,CCY-000000004,IC-000001,PC-000001,OC-000004
4,CMY-000000001,CSY-000000005,CW-000000005,CCY-000000005,IC-000001,PC-000001,OC-000005


In [27]:
conn = sq.connect(dbfullname)

In [28]:
tablename='DNAData'
df.to_sql(tablename, conn, schema='RIF0001', if_exists='replace', index=True)
conn.commit()

In [29]:
df.columns

Index(['CrawlerMasterYoke', 'CrawlerSlaveYoke', 'CrawlerName',
       'CrawlerControlYoke', 'CrawlerInputChromosome',
       'CrawlerProcessChromosome', 'CrawlerOutputChromosome'],
      dtype='object')

In [30]:
for f in df.columns:
    df2=df[f].copy(deep=True)
    df2.drop_duplicates(keep='first', inplace=True)
    df2.reset_index(level=None, drop=True, inplace=True)
    tablename='%sData' % f
    print(tablename, df2.shape)
    df2.to_sql(tablename, conn, schema='RIF0001', if_exists='replace', index=True)
    conn.commit()

CrawlerMasterYokeData (1,)
CrawlerSlaveYokeData (6000,)
CrawlerNameData (6000,)
CrawlerControlYokeData (6000,)
CrawlerInputChromosomeData (20,)
CrawlerProcessChromosomeData (30,)
CrawlerOutputChromosomeData (10,)


In [31]:
sSql='select * from CrawlerInputChromosomeData'
df3=pd.read_sql(sSql, conn)
df3.head()

Unnamed: 0,index,CrawlerInputChromosome
0,0,IC-000001
1,1,IC-000002
2,2,IC-000003
3,3,IC-000004
4,4,IC-000005


In [32]:
conn.close()