In [1]:
from flask import Flask, request, render_template
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

from io import BytesIO
import base64
sns.set()


In [2]:

app = Flask("iris_online_example",template_folder='templates')
classifier = None
cov19_dna = None
df=None

In [3]:
#userful function that gets the current figure as a base 64 image for embedding into websites
def getCurrFigAsBase64HTML():
    im_buf_arr = BytesIO()
    plt.gcf().savefig(im_buf_arr,format='png')
    im_buf_arr.seek(0)
    b64data = base64.b64encode(im_buf_arr.read()).decode('utf8');
    return render_template('img.html',img_data=b64data) 


In [4]:
#convert a sequence of characters into k-mer words, default size = 6 (hexamers)
def Kmers_funct(seq, size=6):
    return [seq[x:x+size].lower() for x in range(len(seq) - size + 1)]


In [5]:
def train():
    global df,cov19_dna,cov19_texts,cv,classifier
    df = pd.read_csv('CoV19seq_country.csv')
    cov19_dna=df
    #convert our training data sequences into short overlapping k-mers of length 6. 
    cov19_dna['words'] = cov19_dna.apply(lambda x: Kmers_funct(x['seq']), axis=1)
    cov19_dna=df.drop("seq",axis=1)
    cov19_texts = list(cov19_dna['words'])
    
    y = cov19_dna.iloc[:, 0].values


    for item in range(len(cov19_texts)):
        cov19_texts[item] = ' '.join(cov19_texts[item])

    
    # convert k-mer words into numerical vectors that represent counts for every k-mer in the vocabulary
    cv = CountVectorizer(ngram_range=(4,4)) #The n-gram size of 4 is previously determined by testing
    X = cv.fit_transform(cov19_texts)
    
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=3) 
    classifier = MultinomialNB(alpha=0.1)
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    

    pickle.dump(classifier,open("model","wb"))
    pickle.dump(cov19_dna,open("data","wb"))
    return accuracy_score(y_test, y_pred)

In [6]:
def init():
    global df
    global cov19_dna
    
    df = pd.read_csv('CoV19seq_country.csv')
    cov19_dna=df
   

    
    train()
    

In [7]:
try:
    classifier = pickle.load(open("model","rb"))
    cov19_dna = pickle.load(open("data","rb"))
except:
    init()

In [8]:
#this method resets/initializes everything (database, model) (should probably password protect this)
@app.route("/reset")
def reset():
    init()
    return "reset model"
    

In [9]:
@app.route("/")
def main():
    return render_template("main.html")


In [10]:
@app.route("/run_observation",methods=["POST"])
def add_data():
    global df
    global cov19_dna
    global classifier
    global d
    
    try:
        seq = request.values.get('seq')
        country = request.values.get('country')

        is_add = request.values.get("add","no")
        is_test = request.values.get("test","no")
    except: 
        return "Error parsing entries"
    
       
    if is_add != "no":
        obs = pd.DataFrame([[seq]],
                           columns=["seq"])
        cov19_dna = pd.concat([df,obs],ignore_index=True)
        
        s = train()

        
        return "Added new sample " + "<pre>"+ cov19_dna.to_string()             + "</pre><br> ... <br> and retrained. <br>  Score is now: " + str(s) + "<br>" 
    
    
    if is_test != "no":
        obs2 = pd.DataFrame([[seq,country]],
                           columns=["seq","country"])
        
        ## traslate this data
        obs2['words'] = obs2.apply(lambda x: Kmers_funct(x['seq']), axis=1)
        obs2=obs2.drop("seq",axis=1)
    
        obs2_texts = list(obs2['words'])
        
        for item in range(len(obs2_texts)):
            obs2_texts[item] = ' '.join(obs2_texts[item])

       
        
        df = pd.read_csv('CoV19seq_country.csv')
        cov19_dna=df
        #convert our training data sequences into short overlapping k-mers of length 6. 
        cov19_dna['words'] = cov19_dna.apply(lambda x: Kmers_funct(x['seq']), axis=1)
        cov19_dna=df.drop("seq",axis=1)
        cov19_texts = list(cov19_dna['words'])
    
        y = cov19_dna.iloc[:, 0].values


        for item in range(len(cov19_texts)):
            cov19_texts[item] = ' '.join(cov19_texts[item])

    
        # convert k-mer words into numerical vectors that represent counts for every k-mer in the vocabulary
        cv = CountVectorizer(ngram_range=(4,4)) #The n-gram size of 4 is previously determined by testing
        x_perdict = cv.fit_transform(obs2_texts)
        
    
        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=3) 
        classifier = MultinomialNB(alpha=0.1)
        classifier.fit(X_train, y_train)
        y_pred = classifier.predict(x_perdict)
        
        return y_pred
        
    return "not implemented"
    

In [11]:
@app.route("/deploy",methods=['GET','POST'])
def deploy():
    global cov19_dna
    global classifier
    
    
    s = train()

    return "accuracy score for the model is " + str(s) 
    


In [None]:
if __name__ == "__main__":
    app.run()

 * Serving Flask app "iris_online_example" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [22/Apr/2020 21:56:33] "[37mGET / HTTP/1.1[0m" 200 -
[2020-04-22 21:56:45,427] ERROR in app: Exception on /run_observation [POST]
Traceback (most recent call last):
  File "/home/hl46161/miniconda3/lib/python3.7/site-packages/flask/app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "/home/hl46161/miniconda3/lib/python3.7/site-packages/flask/app.py", line 1952, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/home/hl46161/miniconda3/lib/python3.7/site-packages/flask/app.py", line 1821, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "/home/hl46161/miniconda3/lib/python3.7/site-packages/flask/_compat.py", line 39, in reraise
    raise value
  File "/home/hl46161/miniconda3/lib/python3.7/site-packages/flask/app.py", line 1950, in full_dispatch_request
    rv = self.dispatch_request()
  File "/home/hl46161/miniconda3/lib/python3.7/site-pac