In [1]:
import re
import sqlite3
import pandas as pd
import numpy as np
import joblib

#For Flask
from flask import Flask, jsonify

In [2]:
#Import library for tokeize, stemming, and stopwords
import nltk
from nltk import word_tokenize
nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import stopwords as stopwords_scratch

#Import library for sklearn model sentiment analysis
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import f1_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import PredefinedSplit
from sklearn.model_selection import train_test_split

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [3]:
# Import library for Tensorflow Model Sentiment Analysis
from keras import optimizers
from keras.preprocessing.text import Tokenizer
from keras_preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import LSTM,Dense
from keras.layers import Embedding
from keras.callbacks import EarlyStopping
from tensorflow import keras
from keras.models import load_model

In [4]:
# Import library for Flask
from flask import Flask, request, jsonify,render_template
from flasgger import Swagger, LazyString, LazyJSONEncoder, swag_from
from unidecode import unidecode

In [8]:
#Swagger UI Definition
app = Flask(__name__)

app.json_encoder = LazyJSONEncoder
swagger_template = dict(
info = {
    'title' : LazyString(lambda: 'Sental (Sentiment Analysis)'),
    'version' : LazyString(lambda : '1.0.0'),
    'description': LazyString(lambda : 'Data menganalisa suatu sentiment'),
},
    host = LazyString(lambda: request.host)
)

swagger_config = {
    'headers': [],
    'specs': [
        {
        'endpoint': 'docs',
        'route': '/docs.json',
        }
    ],
    'static_url_path': '/flasgger_static',
    'swagger_ui': True,
    'specs_route': '/docs/'
}
swagger = Swagger(app, template=swagger_template,
                 config = swagger_config)

#Connect db and csv
conn = sqlite3.connect('data/output.db', check_same_thread=False)
df_alay = pd.read_csv('data/new_kamusalay.csv', names=['alay', 'cleaned'], encoding= 'latin-1')
df_raw = pd.read_csv('data/train_preprocess.tsv', sep='\t', names=['Text', 'Sentiment'])
df_raw.drop_duplicates()

#Define and execute query for unexistence data tables
#Tables will contain fields with dirty text (text & file) and cleaned text (text & file)
conn.execute('''CREATE TABLE IF NOT EXISTS data_text_sk (text_id INTEGER PRIMARY KEY AUTOINCREMENT, Text varchar(255), Sentiment varchar(255));''')
conn.execute('''CREATE TABLE IF NOT EXISTS data_file_sk (text_id INTEGER PRIMARY KEY AUTOINCREMENT, Text varchar(255), Sentiment varchar(255));''')
conn.execute('''CREATE TABLE IF NOT EXISTS data_text_tf (text_id INTEGER PRIMARY KEY AUTOINCREMENT, Text varchar(255), Sentiment varchar(255));''')
conn.execute('''CREATE TABLE IF NOT EXISTS data_file_tf (text_id INTEGER PRIMARY KEY AUTOINCREMENT, Text varchar(255), Sentiment varchar(255));''')

list_stopwords = stopwords_scratch.words('indonesian')
list_stopwords_en = stopwords_scratch.words('english')
list_stopwords.extend(list_stopwords_en)
list_stopwords.extend(['ya', 'yg', 'ga', 'yuk', 'dah', 'baiknya', 'berkali', 'kali', 'kurangnya', 'mata', 'olah', 'sekurang', 'setidak', 'tama', 'tidaknya'])

#Add External Stopwords
f = open("stopwords/tala-stopwords-indonesia.txt", "r")
stopword_external = []
for line in f:
    stripped_line = line.strip()
    line_list = stripped_line.split()
    stopword_external.append(line_list[0])
f.close()
list_stopwords.extend(stopword_external)
stopwords = list_stopwords

#Creating function for Cleansing Process
def lowercase(text): # Change uppercase characters to lowercase
    return text.lower()

def special(text):
    text = re.sub(r'\W', ' ', str(text), flags=re.MULTILINE)
    return text

def single(text):
    text = re.sub(r'\s+[a-zA-Z]\s+', ' ', text, flags=re.MULTILINE)
    return text

def singlestart(text):
    text = re.sub(r'\^[a-zA-Z]\s+', ' ', text, flags=re.MULTILINE)
    return text

def mulspace(text):
    text = re.sub(r'\s+', ' ', text, flags=re.MULTILINE)
    return text

#Removing RT
def rt(text):
    text = re.sub(r'rt @\w+: ', ' ', text, flags=re.MULTILINE)
    return text

#Removing prefixed 'b'
def prefixedb(text):
    text = re.sub(r'^b\s+', '', text, flags=re.MULTILINE)
    return text

def misc(text):
    text = re.sub(r'((www\.[^\s]+)|(https?://[^\s]+)|(http?://[^\s]+))|([#@]\S+)|user|\n|\t', ' ', text, flags=re.MULTILINE)
    return text

#Mapping for kamusalay
alay_mapping = dict(zip(df_alay['alay'], df_alay['cleaned']))
def alay(text):
    wordlist = text.split()
    text_alay = [alay_mapping.get(x,x) for x in wordlist]
    clean_alay = ' '.join(text_alay)
    return clean_alay

def stopwords(text):
    text_tokens = word_tokenize(text)
    tokens_without_sw = [word for word in text_tokens if not word in stopwords]
    output_sw = ' '.join(tokens_without_sw)
    return output_sw

#Function for text cleansing
def cleansing(text):
    text = lowercase(text)
    text = special(text)
    text = single(text)
    text = singlestart(text)
    text = mulspace(text)
    text = rt(text)
    text = prefixedb(text)
    text = misc(text)
    text = alay(text)
    text = stopwords(text)
    return text

#Sklearn Neural Network Analysis Sentiment
#Load the sklearn Model
f1 = joblib.load('data/score.pkl')
clf = joblib.load('data/model.pkl')
vectorizer = joblib.load('data/vectorizer.pkl')

#Function for CSV Sklearn Analysis
def sentiment_csv_nn(input_file):
    column = input_file.iloc[:, 0]
    print(column)
    
    #Define and execute query for insert cleaned text and sentiment to sqlite database
    for data_file in column:
        data_clean = cleansing(data_file)
        sent = clf.predict(vectorizer.transform([data_clean]).toarray())
        query = "insert into data_file_sk ('Text', 'Sentiment') values (?, ?)"
        val = (data_clean,str(sent))
        conn.execute(query, val)
        conn.commit()
        print(data_file)

#Create Homepage
@swag_from('C:/Users/User/Binar/Platinum Binar Academy/docs/welcome_pages.yml', methods=['GET'])
@app.route('/', methods=['GET'])
def get():
    return "Welcome to Sental Dashboard"

#Text Analysis Sklearn
#Input text to analyze
@swag_from('C:/Users/User/Binar/Platinum Binar Academy/docs/text_sklearn.yml', methods=['POST'])
@app.route('/text_sklearn', methods=['POST'])
def text_sentiment_sklearn():
    #Get text from user
    input_text = str(request.form['text'])
    
    #Cleaning text
    output_text = cleansing(input_text)
    
    #Model Prediction for Sentiment Analysis
    sent = clf.predict(vectorizer.transform([output_text]).toarray())
    
    # Define and execute query for insert cleaned text and sentiment to sqlite database
    query = "insert into data_text_sk (text,sentiment) values (?, ?)"
    val = (output_text,str(sent))
    conn.execute(query, val)
    conn.commit()
    
    #Define API Response
    json_response = {
        'description': "Analysis Sentiment Success!",
        'F1 on test set': f1,
        'text' : output_text,
        'sentiment' : str(sent)
    }
    response_data = jsonify(json_response)
    return response_data

#Endpoint for File Analysis SKLearn
@swag_from('C:/Users/User/Binar/Platinum Binar Academy/docs/file_sklearn.yml', methods=['POST'])
@app.route('/file_sklearn', methods=['POST'])
def file_sentiment_sk():
    #Get File
    file = request.files['file']
    try:
            datacsv = pd.read_csv(file, encoding='iso-8859-1')
    except:
            datacsv = pd.read_csv(file, encoding='utf-8')
    
    #Cleaning file
    sentiment_csv_nn(datacsv)
    
    #Define API response
    select_data = conn.execute("SELECT * FROM data_file_sk")
    conn.commit
    data = [
        dict(text_id=row[0], text=row[1], sentiment=row[2])
    for row in select_data.fetchall()
    ]
    
    return jsonify(data)

#Tensorflow LSTM Model Analysis Sentimen
#Load the Tensorflow Model
model = load_model('data/model.h5')
tokenizer = joblib.load('data/tokenizer.pkl')

#Model Prediction
#Create Function for Sentiment Prediction
def predict_sentiment(text):
    sentiment_tf = ['negative', 'neutral', 'positive']
    text = cleansing(text)
    tw = tokenizer.texts_to_sequences([text])
    tw = pad_sequences(tw, maxlen=200)
    prediction = model.predict(tw)
    polarity = np.argmax(prediction[0])
    return sentiment_tf[polarity]

def sentiment_csv_tf(input_file):
    column = input_file.iloc[:, 0]
    print(column)
    
    # Define and execute query for insert cleaned text and sentiment to sqlite database
    for data_file in column:
        data_clean = cleansing(data_file)
        sent = predict_sentiment(data_clean)
        query = "insert into data_file_tf ('Text', 'Sentiment') values (?, ?)"
        val = (data_clean,sent)
        conn.execute(query, val)
        conn.commit()
        print(data_file)

#Endpoint for Text Analysis Tensorflow
#Input text to analyze
@swag_from('C:/Users/User/Binar/Platinum Binar Academy/docs/text_tensorflow.yml', methods=['POST'])
@app.route('/text_tensorflow', methods=['POST'])
def text_sentiment_tf():
    #Get text from user
    input_text = str(request.form['text'])
    
    #Cleansing text
    output_text = cleansing(input_text)
    
    #Model Prediction for Sentiment Analysis
    output_sent = predict_sentiment(output_text)
    
    #Define and execute query for insert cleaned text and sentiment to sqlite database
    query = "insert into data_text_tf (Text,Sentiment) values (?, ?)"
    val = (output_text,output_sent)
    conn.execute(query, val)
    conn.commit()
    
    #Define API response
    json_response = {
        'description': "Analysis Sentiment Success!",
        'text' : output_text,
        'sentiment' : output_sent
    }
    response_data = jsonify(json_response)
    return response_data

#Endpoint for File Analysis Tensorflow
@swag_from('C:/Users/User/Binar/Platinum Binar Academy/docs/file_tensorflow.yml', methods=['POST'])
@app.route('/file_tensorflow', methods=['POST'])
def file_sentiment_tf():
    #Get file
    file = request.files['file']
    try:
            datacsv = pd.read_csv(file, encoding='iso-8859-1')
    except:
            datacsv = pd.read_csv(file, encoding='utf-8')
    
    #Cleaning file
    sentiment_csv_tf(datacsv)
    
    #Define API response
    select_data = conn.execute("SELECT * FROM data_file_tf")
    conn.commit
    data = [
        dict(text_id=row[0], text=row[1], sentiment=row[2])
    for row in select_data.fetchall()
    ]
    
    return jsonify(data)

    

if __name__ == '__main__':
    app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [04/Mar/2023 08:39:25] "[37mGET /docs/ HTTP/1.1[0m" 200 -
127.0.0.1 - - [04/Mar/2023 08:39:26] "[37mGET /docs.json HTTP/1.1[0m" 200 -


0       - disaat semua cowok berusaha melacak perhatia...
1       RT USER: USER siapa yang telat ngasih tau elu?...
2       41. Kadang aku berfikir, kenapa aku tetap perc...
3       USER USER AKU ITU AKU\n\nKU TAU MATAMU SIPIT T...
4       USER USER Kaum cebong kapir udah keliatan dong...
                              ...                        
2567    USER Pak USER tujuan hidup Muslim beribadat ke...
2568             Beritanya BOHONG MULU nih pak jokowi URL
2569    Info : Warga Baduy setelah selesai memperingat...
2570                                USER Sitip apa siapa'
2571    USER USER USER USER Coba ente kroscek soal tka...
Name: Tweet, Length: 2572, dtype: object
- disaat semua cowok berusaha melacak perhatian gue. loe lantas remehkan perhatian yg gue kasih khusus ke elo. basic elo cowok bego ! ! !'
RT USER: USER siapa yang telat ngasih tau elu?edan sarap gue bergaul dengan cigax jifla calis sama siapa noh licew juga'
41. Kadang aku berfikir, kenapa aku tetap percaya pada Tuhan 

AB: pribadi yg senang berkompetisi dalam hal yang baik dan positif
USER USER Terus menurut lo kitab suci agAma apa yg di bilang om USER ; ; #Mendadakkalap
USER Suka nutup mata. Kaga ditutup pun udah terlihat tertutup gara2 sipit /?'
USER Sy sbg muslim tdk merasa pak USER menistakan kitab suci Alqur'an, krn dlm Alqur'an sndri ada bbrp suku kata yg smpai dtik ini blm diketahui mkna faktualnya olh umat islam, tp kami yakini kata tsb ada makna yg tersirat (fiksi) yg blm
USER eh kasar kau kampang'
BENTAR LAGI GUA RILIS ALBUM THE ONTA'S WKWKWKWKWK"
USER Ada cebong bang dia merasa paling benar'
Najis banget gw ngetwit apa barusan wey'
Kamu transgender atau gmn anjing :( URL
USER awas bintitan lu ngintipin titit kucing'
USER USER Bawel, baper, bolot, baik, bodor , gaje wkwk apa ya udah itu aja deh'
USER USER USER USER Gue saranin gk perlu bnyk bacot maling ayat, langsung to the point aja lu jadi orang... Yg tdk sepaham lu teriakin aja kafir sesat, liberal, pemuja dukun, bla blaaa.. pasti hatim

Foto memang bisu ,tpi memiliki banyak arti yg sush diartikan,sma seperti kamu susah diartikan,mau putus atau lanjut ?'
USER wakakakaka aku sudah mengalaminya \xf0\x9f\x98\x82\xf0\x9f\x98\x82\xf0\x9f\x98\x82\xf0\x9f\x98\x82 tetiba muncul grub wa a.n Bani Abd Hadi a.k.a mbahku dan seketika aku mek kenal beberapa orang saja \xf0\x9f\xa4\xa3\xf0\x9f\xa4\xa3\xf0\x9f\xa4\
abam pres badar n nuqaba pling boekkkð???, dia ni hebat dlm bidang agama, x tkut nk tgur member sndiri, n selalu wat lawak hambar (yg pling aku ingt smpai skrg lawak dia klau org ckp jom gerak skrg dia akn gerakkn badan dia yg chubby tu)ð???,
â??Seorang anak Pekan telah mengilhamkan Dasar Ekonomi Baru (DEB) dan menjadi penggerak utama kepada kemajuan ekonomi luar bandar. Beliau adalah Allahyarham Tun Abdul Razakâ?? - DS USER #MalaysiaMemilih #HogohUndi
USER USER Hahaha sialaaan nih si USER,; ; ternyata klo lg laper, ; presiden juga mau disikat sama doið???ð??½; ;
USER USER USER mau menasehati kali supaya ga ugal2an nih rezi

KLHK Upayakan Pemerataan Ekonomi Melalui TORA di Kawasan Hutan dan Perhutanan Sosial - Tribun Medan
USER Pak USER Mahfud MD sudah berpaling dari Allah SWT Pemberang, demi bangsa ajak semua Buang Islam
USER USER Orang2 bego kebanyakan minum &amp; makan kotoran onta ...otak nya somplak'
Apa yang terkuak semuanya sudah jelas sejak awal memang itu strategi kampanye kami di tim pemenangan anies sandi URL
"Warga Baduy setelah selesai memperingati tradisi Seba di Pendopo Pemkab Lebak akan dilanjutkan kembali bertemu dengan Gubernur Banten Wahidin Halim hari ini, Sabtu (21/4) #SebaBaduy2018"
Lembaga2 anti-korupsi masa lalu bubar krn sentuh kekuasaan. Giliran KPK kenapa PDIP partai politik penguasa yg plg ngotot ingin bubarkan KPK?
USER USER USER Justru mas klo mnyudutkan salah satu agama trtentu malah bahaya,itulah pak rocky ini cerdik. Dia bisa nyusun kalimat yg tepat &amp;pantas dgunakan,Inti dari ini dia mau kasih tau pngertian fiksi sbenarnya,ayolah mas nya i
Para Pendukung Capres gagal Mo

USER Tu yang saya katakan sejak dulu lagi kita tak takut dgn cina atau parti Dap tapi kita bimbang fahaman yang dibawa oleh mereka....'
"Cermin pun pecah saat dirinya berkaca"; ;
USER Lansung tag partainya...\nGa berani ya, berarti kau BANCI....'
Awwww comel nya asing kan ikut warna'
Islam menjadikan manusia yang idealis namun tetap realistis. Mempersiapkan kehidupan masa kini, namun tak lupa masa depan. Diajak bekerja keras, dan diajak pula beristirahat.
USER KRISTUS ANJING ASU JIAMPUT KEPARAT PICEK TELEK PEJUH SIWUK KOTANG SIWUK IDIOT KEPARAT PICEK TELEK PEJUH SIWUK KOTANG SIWUK IDIOT KEPARAT PICEK TELEK PEJUH SIWUK KOTANG SIWUK IDIOT KEPARAT PICEK TELEK PEJUH SIWUK KOTANG SIWUK IDIOT KEPARAT PICEK TELEK PEJUH SIWUK K
USER Agama katholik ngak ada yg menyebut khatolik nusantara, agama kristen protestan ngak ada yg menyebut kristen protestan nusantara, agama hindu ngak ada yg menyebut hindu nusantara, agama budha ngak ada yg menyebut budha nusantara, ...lha kok ini
3. Karena berbasis t

USER USER USER USER USER USER USER USER USER USER USER Coba ngaca n introspeksi,apakah keluarga anda yg gak pake hijab komunis jga?,otak dipakai jgn cmn jd property or pa
muhammadiyahinggris jalin kerjasama pengendalian ekstrimisme
USER Kalo neraka terkena bom , maka surga akan selalu ada untuk semua org dimuka bumi ini termasuk goku.'
BOT mau nanya. 'ich liebe dich' artinya apa? Yg jawab ntar BOT kasih aipet yg barusan nyemplung di jamban kuda (\xe2\x80\xa2\xcc\xaf\xcd\xa1_\xe2\x80\xa2\xcc\xaf\xcd\xa1 )"
COPOT Darmin Nasution Menko Perekonomian BODOH yang turunkan daya beli sejak menjabat USER USER
RT USER: Bodo amat anjir bodo ngakak gua ngakak astagfirullah humor bet URL
USER USER Matamu picek, cukk..\nOra delok prestasli pak jokowi..\n\nGuoblokkk..!!\nTolol....!!'
Berhasil Penjarakan Ahok, Alumni 212 Pendukung Rizieq Akan Lengserkan Jokowi Usai Lebaran
USER Coba dedengkot JIN terus terang ke para ulama Saudi atau rilis ke publik tentang akidah JIN. Contoh, JIN tidak anggap kafir pe

USER Ha ha kocak ...kalabong emang dungu'
Sehingga catatan itu yang kami buka kembali. Tidak boleh dilupakan pengalaman Pilkada DKI Jakarta 2017, USER dan USER yang diusung Partai Gerindra dan USER berhasil menumbangkan petahana yang diusung oleh koalisi besar yang dimotori
Mengenal Walpri Perempuan dalam Pilkada Bondowoso â?? Radar Jember
USER USER USER Disana mayoritas Budha. Ada jg pendetanya yg ikut Demo. Kau googling lah video demo di Thailand biar tau.'
USER USER Begitulah cara komunis bermain.'
suka kesel sama orang yang pas gua jadian confess ke gua giliran gua jomlo malah pada kemana tau NGOMONG SAMA SILIT'
USER Ehhhhh BANDOT USER LU MAU KRITIK PAK JOKOWI AMPE MENCRET DARAH IS OK....\nTPI KLO LU HINA, FITNAH... KAMI AKAN AMBIL TINDAKAN.... \nPAK JOKOWI PRESIDEN !!! PEMIMPIN NEGARA... \nLU PEMIMPIN AGAMA TPI SALAH KAPRAH LUUUU'
Kita balas di sini gima kita usir budha "USER: Kurang Ajar, Etnis Muslim Rohingya Disingkirkan dari Data Sensus Penduduk Myanmar
Astagfirullah mata gw t