# Database: sample table

**Set environment**

In [8]:
### basic
import sys
sys.path.append('../')
from config_sing import *

### specific tools
import sqlite3
# https://stackoverflow.com/questions/49456158/integer-in-python-pandas-becomes-blob-binary-in-sqlite
sqlite3.register_adapter(np.int64, lambda val: int(val))
sqlite3.register_adapter(np.int32, lambda val: int(val))

### global variables
fdiry = os.path.join(FD_RES, 'database')
fname = "fragment_chr17.db"
FPATH_DB = os.path.join(fdiry, fname)

## Sample information

In [2]:
fdiry = FD_RES
os.listdir(fdiry)

['input_score',
 'database',
 '.ipynb_checkpoints',
 'nuc',
 'count_fragment',
 'coverage',
 'model_linear',
 'log',
 'annotation_fragment',
 'peak',
 'source',
 'data']

In [3]:
fdiry = os.path.join(FD_RES, 'source')
fname = "library_size*"
fglob = os.path.join(fdiry, fname)

glob.glob(fglob)

['/mount/work/out/proj_combeffect/source/library_size.tsv',
 '/mount/work/out/proj_combeffect/source/library_size.txt']

In [4]:
!head /mount/work/out/proj_combeffect/source/library_size.tsv

Sample	Group	Size
Input1_20x	Input_20x	371718546
Input1	Input	18666630
Input2_20x	Input_20x	347635732
Input2	Input	20167924
Input3_20x	Input_20x	349994051
Input3	Input	23280988
Input4_20x	Input_20x	413508358
Input4	Input	19003938
Input5_20x	Input_20x	341110487


## Import sample information

In [5]:
fdiry = os.path.join(FD_RES, 'source')
fname = "library_size.tsv"
fpath = os.path.join(fdiry, fname)

dat_sam = pd.read_table(fpath)
print(dat_sam.dtypes)
dat_sam.head()

Sample    object
Group     object
Size       int64
dtype: object


Unnamed: 0,Sample,Group,Size
0,Input1_20x,Input_20x,371718546
1,Input1,Input,18666630
2,Input2_20x,Input_20x,347635732
3,Input2,Input,20167924
4,Input3_20x,Input_20x,349994051


In [6]:
dat_sam.to_records(index=False)

rec.array([('Input1_20x', 'Input_20x', 371718546),
           ('Input1', 'Input',  18666630),
           ('Input2_20x', 'Input_20x', 347635732),
           ('Input2', 'Input',  20167924),
           ('Input3_20x', 'Input_20x', 349994051),
           ('Input3', 'Input',  23280988),
           ('Input4_20x', 'Input_20x', 413508358),
           ('Input4', 'Input',  19003938),
           ('Input5_20x', 'Input_20x', 341110487),
           ('Input5', 'Input',  15325016),
           ('TFX2_AZD2906', 'TFX_AZD2906',  48376253),
           ('TFX2_AZD9567', 'TFX_AZD9567',  52542517),
           ('TFX2_CORT108297', 'TFX_CORT108297',  43646484),
           ('TFX2_CpdA', 'TFX_CpdA',  41732268),
           ('TFX2_Dex', 'TFX_Dex',  45413539),
           ('TFX2_DMSO', 'TFX_DMSO',  43844606),
           ('TFX2_GW870086', 'TFX_GW870086',  56207769),
           ('TFX2_Hydrocortisone', 'TFX_Hydrocortisone',  47829936),
           ('TFX2_Mapracorat', 'TFX_Mapracorat',  38120420),
           ('TFX2_RU486', '

In [7]:
row = dat_sam.to_records(index=False)[0]
print(row)

('Input1_20x', 'Input_20x', 371718546)


## create table and schema

```
# https://stackoverflow.com/questions/49456158/integer-in-python-pandas-becomes-blob-binary-in-sqlite
sqlite3.register_adapter(np.int64, lambda val: int(val))
sqlite3.register_adapter(np.int32, lambda val: int(val))
```

In [9]:
query_reset = ("DROP TABLE IF EXISTS Sample")

query_table = ("""CREATE TABLE IF NOT EXISTS Sample(
    sample    TEXT PRIMARY KEY, 
    treatment TEXT,
    size      INTEGER
);""")

query_insert = ("INSERT INTO Sample (sample, treatment, size) VALUES (?, ?, ?)")

In [10]:
fpath_db = FPATH_DB
with sqlite3.connect(fpath_db) as conn:
    ### reset
    cursor = conn.cursor()
    query  = query_reset
    cursor.execute(query)
    
    ### create table
    query  = query_table
    cursor.execute(query)
    
    ### insert values
    rows   = dat_sam.to_records(index=False)
    query  = query_insert
    cursor.executemany(query, rows)
    
    ### show that the table is created
    cursor.execute("SELECT * FROM Sample")
    for row in cursor.fetchall():
        print(row)

('Input1_20x', 'Input_20x', 371718546)
('Input1', 'Input', 18666630)
('Input2_20x', 'Input_20x', 347635732)
('Input2', 'Input', 20167924)
('Input3_20x', 'Input_20x', 349994051)
('Input3', 'Input', 23280988)
('Input4_20x', 'Input_20x', 413508358)
('Input4', 'Input', 19003938)
('Input5_20x', 'Input_20x', 341110487)
('Input5', 'Input', 15325016)
('TFX2_AZD2906', 'TFX_AZD2906', 48376253)
('TFX2_AZD9567', 'TFX_AZD9567', 52542517)
('TFX2_CORT108297', 'TFX_CORT108297', 43646484)
('TFX2_CpdA', 'TFX_CpdA', 41732268)
('TFX2_Dex', 'TFX_Dex', 45413539)
('TFX2_DMSO', 'TFX_DMSO', 43844606)
('TFX2_GW870086', 'TFX_GW870086', 56207769)
('TFX2_Hydrocortisone', 'TFX_Hydrocortisone', 47829936)
('TFX2_Mapracorat', 'TFX_Mapracorat', 38120420)
('TFX2_RU486', 'TFX_RU486', 47687319)
('TFX2_ZK216348', 'TFX_ZK216348', 45455167)
('TFX3_AZD2906', 'TFX_AZD2906', 26840149)
('TFX3_AZD9567', 'TFX_AZD9567', 44435989)
('TFX3_CORT108297', 'TFX_CORT108297', 32701206)
('TFX3_CpdA', 'TFX_CpdA', 26352610)
('TFX3_Dex', 'TFX_D