In [1]:
import sqlite3 as sq1
import pandas as pd

In [14]:
conn =sq1.connect("RSV.db")

try:
    conn.execute("""
    DROP TABLE IF EXISTS sequence
    """)
    
    conn.execute("""
    DROP TABLE IF EXISTS metadata
    """)
    
    conn.execute("""
    DROP TABLE IF EXISTS phylogenetic
    """)
    
    conn.execute("""
    DROP TABLE IF EXISTS ref
    """)
    
    conn.execute("""
    DROP TABLE IF EXISTS epitope
    """)
    
    conn.execute("""
    CREATE TABLE IF NOT EXISTS sequence (assession TEXT, seq TEXT, PRIMARY KEY ('assession'));
    """)
    conn.execute("""
    CREATE TABLE IF NOT EXISTS metadata (assession TEXT, country TEXT, Year INT,
                    FOREIGN KEY('assession') 
                    REFERENCES sequence ('assession')
                    ON DELETE CASCADE
                    ON UPDATE CASCADE);
    """)
    conn.execute("""
    CREATE TABLE IF NOT EXISTS ref (id TEXT, title TEXT, journal TEXT,
                    FOREIGN KEY('id') 
                    REFERENCES sequence ('assession')
                    ON DELETE CASCADE
                    ON UPDATE CASCADE);  
    """)
    
    conn.execute("""
    CREATE TABLE IF NOT EXISTS phylogenetic (assession TEXT, genotype TEXT, clade TEXT,
                    FOREIGN KEY('assession') 
                    REFERENCES sequence ('assession')
                    ON DELETE CASCADE
                    ON UPDATE CASCADE);  
    """)
    
    conn.execute("""
    CREATE TABLE IF NOT EXISTS epitope (genotype TEXT, epitope TEXT,
                    FOREIGN KEY('genotype') 
                    REFERENCES phylogenetic ('genotype')
                    ON DELETE CASCADE
                    ON UPDATE CASCADE);  
    """)
    
    conn.executemany("""
    INSERT INTO sequence VALUES (?, ?)""",[('AY911262','ATGGACTCTTACT'),
                                           ('FJ614813','CTGGACGGTTTCC'),
                                           ('FJ948820','GCTTTCGGGATAC'),
                                           ('JF920046','ATGGACTCTTACT'),
                                           ('JF920047','GCTTTCGGGATAC')])
    conn.executemany("""
    INSERT INTO metadata VALUES (?,?,?)""",[('AY911262','USA',2007),
                                           ('FJ614813','USA',2009),
                                           ('FJ948820','Netherland',2010),
                                           ('JF920046','USA',2012),
                                           ('JF920047','USA',2009)])
    conn.executemany("""
    INSERT INTO ref VALUES (?,?,?)""",[('AY911262','Respiratory syncytial virus nonstructural proteins NS1 and NS2','J. Virol'),
                                      ('FJ614813','A chimeric A2 strain of respiratory syncytial virus','J. Virol'),
                                      ('JF920047','Whole Genome Sequencing and Evolutionary','PLoS ONE 6')])
    conn.executemany("""
    INSERT INTO phylogenetic VALUES (?,?,?)""",[('AY911262','ON1','IV'),
                                               ('FJ614813','ON1','II'),
                                               ('FJ948820','GA2','I'),
                                               ('JF920046','BA','I'),
                                               ('JF920047','GB3','III')])
    
    conn.executemany("""
    INSERT INTO epitope VALUES (?, ?)""",[('ON1','ATGGAC'),
                                          ('GA2','CTGGAC'),
                                          ('BA','AGATAC'),
                                          ('GB3','GCTTTCG')])

    cursor1 = conn.execute("select * from sequence")
    df1 = pd.DataFrame(cursor1.fetchall(),columns = [ 'assession','seq'] )              
    print(df1)

    
    cursor2 = conn.execute('''select sequence.assession, sequence.seq, ref.journal from
                           sequence join ref
                           on sequence.assession = ref.id''')
                           
    df2 = pd.DataFrame(cursor2.fetchall(),columns = [ 'assession','seq','journal'] )              
    print(df2)
    
    cursor3 = conn.execute('''select sequence.assession, sequence.seq, metadata.country from
                           sequence join metadata
                           on sequence.assession = metadata.assession
                           where country = 'USA' ''')
    df3 = pd.DataFrame(cursor3.fetchall(),columns = [ 'assession','seq','country'] )              
    print(df3)
    
    cursor4 = conn.execute('''select sequence.assession, sequence.seq, phylogenetic.genotype from 
                           sequence join phylogenetic
                           on sequence.assession = phylogenetic.assession
                           order by sequence.assession asc''')
    df4 = pd.DataFrame(cursor4.fetchall(),columns = [ 'assession','seq','genotype'] )              
    print(df4)
    
    cursor5 = conn.execute('''update phylogenetic set genotype = 'GA2' where genotype = 'ON1';''')
    cursor5 = conn.execute('''select phylogenetic.assession, phylogenetic.clade, phylogenetic.genotype,epitope.epitope from
                           epitope join phylogenetic
                           on phylogenetic.genotype = epitope.genotype''')
    df5 = pd.DataFrame(cursor5.fetchall(),columns = [ 'assession','clade','genotype','epitope'] )              
    print(df5)                      
                           
                           
    conn.commit();
                           
                           
except sqlite3.Error as e:
    print("There was error:"+str(e))   
    
finally: 
    print("this excuted")
    conn.close()                  
 

  assession            seq
0  AY911262  ATGGACTCTTACT
1  FJ614813  CTGGACGGTTTCC
2  FJ948820  GCTTTCGGGATAC
3  JF920046  ATGGACTCTTACT
4  JF920047  GCTTTCGGGATAC
  assession            seq     journal
0  AY911262  ATGGACTCTTACT    J. Virol
1  FJ614813  CTGGACGGTTTCC    J. Virol
2  JF920047  GCTTTCGGGATAC  PLoS ONE 6
  assession            seq country
0  AY911262  ATGGACTCTTACT     USA
1  FJ614813  CTGGACGGTTTCC     USA
2  JF920046  ATGGACTCTTACT     USA
3  JF920047  GCTTTCGGGATAC     USA
  assession            seq genotype
0  AY911262  ATGGACTCTTACT      ON1
1  FJ614813  CTGGACGGTTTCC      ON1
2  FJ948820  GCTTTCGGGATAC      GA2
3  JF920046  ATGGACTCTTACT       BA
4  JF920047  GCTTTCGGGATAC      GB3
  assession clade genotype  epitope
0  AY911262    IV      GA2   CTGGAC
1  FJ614813    II      GA2   CTGGAC
2  FJ948820     I      GA2   CTGGAC
3  JF920046     I       BA   AGATAC
4  JF920047   III      GB3  GCTTTCG
this excuted
