First we import the library, this are the library we use

*note: aiogram is telegram bot framework

In [61]:
import logging
import numpy
import pandas
import random
import math
import time
import datetime
import operator
import requests
import pytz

import aiogram.utils.markdown as md
from aiogram.types import message
from aiogram import Bot, Dispatcher, executor, types
from aiogram.contrib.fsm_storage.memory import MemoryStorage
from aiogram.dispatcher import FSMContext
from aiogram.dispatcher.filters import Text
from aiogram.dispatcher.filters.state import State, StatesGroup
from aiogram.types import ParseMode
from aiogram.utils import executor

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import GridSearchCV

from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

And then we create sastrawi stemmer object and read response-sentence csv with pandas

In [62]:
sw_factory = StopWordRemoverFactory()
sw_remover = sw_factory.create_stop_word_remover()

stem_factory = StemmerFactory()
stemmer = stem_factory.create_stemmer()

rsp_list = pandas.read_csv("response.csv")
snt_list = pandas.read_csv("sentence.csv")
syn_list = pandas.read_csv("sinonim.csv")

## === Main echo functions ===
We're gonna skip the aiogram code a bit and just went straight to main text and data proccessing

So basically the main code to detect user text input is like this

In [None]:
logging.basicConfig(level=logging.INFO)
storage = MemoryStorage()

bot = Bot(token="Bot_token_here..")
dp = Dispatcher(bot, storage=storage)

@dp.message_handler()
async def echo(message: types.Message):
    bot_respon = [] # to save responses
    detected_intent = [] # to save intents
    pesan = message.text.lower() # user input
    kata = pesan.split() # user words (splitted)


    get_responses(kata, detected_intent, bot_respon)      
    bot_respon += get_covid_info( stemmer.stem(synonymize(pesan)) )

    # iterate the bot_respon (if not null)
    for item in bot_respon:
        if item is not None:          
            await message.answer(item) # send the response to user

There's a bunch of made function, so lets head back and see the functions code!

First we make a function to get responses

In [57]:
def get_responses(words, intents, current_responses):
  for word in words: # iterate user input
      for sentence in snt_list.itertuples(): # convert senctence list and iterate the sentences

          # if stemmed input match the sentence in csv and same intent is not exist
          if stemmer.stem(sentence.Sentence) == stemmer.stem(word) and sentence.Intent not in intents:
            # add both response and intent
            current_responses = add_respon(current_responses, sentence.Intent)            
            intents += [ sentence.Intent ]

Now this is the add_respon function

In [58]:

def add_respon(responses, key):
  # search response that match the key (intent)
  getResponse = rsp_list[rsp_list["Intent"] == key].to_records(index=False)

  #if not empty the add the response
  if not empty(getResponse):
    responses += [ random.choice(getResponse)[1] ]

  return responses

Empty function's basically just check if array lenght is not 0

In [59]:
def empty(lists):
  if len(lists) == 0:
    return True

  return False

And we also have synonymize to replace the same intent words into one same word

In [60]:
def synonymize(words):
  sinonim_list = syn_list.to_records(index=False)
  result = words.lower()

  for sinonims in sinonim_list:
    for item in str(sinonims[1]).split(','):
      result = result.replace(item, f"{sinonims[0]} ", 1)    

  return result

## === Covid Info ===

Next is get_covid_info functions

This function goals is to predict user requested topic of covid related information

In [None]:
def get_covid_info(user_input):
  response = None
  article = open("article.txt", "r")

  paragraph = article.read().split("\n\n\n")
  tokens = [ stemmer.stem(sw_remover.remove(synonymize(token))) for token in paragraph ]

  tokens.append(user_input)
  vectorized = CountVectorizer().fit_transform(tokens)
  similarity = cosine_similarity(vectorized[-1], vectorized)
  similarity_list = similarity.flatten()
  index = get_similarity_index(similarity_list)
  index = index[1:]

  for x in range(len(index)):
    if similarity_list[index[x]] > 0.2:
      response = f"{paragraph[index[x]]}\n\n"
      break

  tokens.remove(user_input)

  return [ response ]

function to reshape the index

In [None]:
def get_similarity_index(lists):
  lens = len(lists)
  list_index = list(range(0, lens))

  for x in range(lens):
    for y in range(lens):
      if lists[list_index[x]] > lists[list_index][y]:
        swap = list_index[x]
        list_index[x] = list_index[y]
        list_index[y] = swap

  return list_index

## === Predict covid-19 case ===

This is a function to display and predict covid-19 case

In [None]:
def get_covid_stats():
    now = datetime.datetime.now(pytz.timezone('Asia/Jakarta'))
    head = []
    data = []
    start = ''

    lastweek = now - datetime.timedelta(days=now.weekday(), weeks=1)

    for x in range(0, 7):
        timey = lastweek + datetime.timedelta(days=x)
        d = timey.strftime('%d')
        m = timey.strftime('%B')
        y = timey.strftime('%Y')

        get_covid = requests.get(f"https://apicovid19indonesia-v2.vercel.app/api/indonesia/provinsi/harian?year={y}&date={d}&month={m}")      
        covid_info = get_covid.json() 
    
        if x == 0:
          start = timey.strftime('%d-%b')

        head += [ timey.strftime('%d-%b') ]
        data += [ covid_info['data'][0]['cur_total'] ]

    confirmed = pandas.DataFrame([data], columns = head)
    dates = confirmed.keys()
    cases = []      

    for i in dates:
        cases.append(confirmed[i].sum())

    days_since = numpy.array([i for i in range(len(dates))]).reshape(-1, 1)
    cases = numpy.array(cases).reshape(-1, 1)

    days_in_future = 3
    future_forcast = numpy.array([i for i in range(len(dates)+days_in_future)]).reshape(-1, 1)

    start_date = datetime.datetime.strptime(start, '%d-%b')
    future_forcast_dates = []
    cases_date = []
    for i in range(len(future_forcast)):
        future_forcast_dates.append((start_date + datetime.timedelta(days=i)).strftime('%d-%b'))

    X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since, cases, test_size=0.15, shuffle=False) 

    kernel = ['poly', 'sigmoid', 'rbf']
    c = [0.01, 0.1, 1, 10, 100]
    gamma = [0.01, 0.1, 10]
    epsilon = [0.01, 0.1, 10]
    shrinking = [True, False]
    svm_parameters = {
      'kernel': kernel, 
      'C': c, 
      'gamma' : gamma, 
      'epsilon': epsilon, 
      'shrinking' : shrinking
    }

    svm = SVR()
    svm_search = RandomizedSearchCV(svm, svm_parameters, scoring='neg_mean_squared_error', cv=3, return_train_score=True, n_jobs=1, n_iter=45, verbose=2)  
    svm_search.fit(X_train_confirmed, y_train_confirmed.ravel())

    svm_search.best_params_

    svm_confirmed = svm_search.best_estimator_
    svm_pred = svm_confirmed.predict(future_forcast)

    x = ''
    y = ''
    for i in cases:
        x += f'{round(i[0])},'
    for i in svm_pred:
        y += f'{round(i)},'
    x = x.rstrip(',')
    y = y.rstrip(',')

    url = f"https://image-charts.com/chart?cht=lc&chd=a:|{ x }|{ y }&chdl=Prediksi|Terkonfirmasi&chxl=0:|{ '|'.join(future_forcast_dates) }|1:||1000|2000|3000|4000|5000|&chs=900x500&chco=3072F3,ff0000&chdlp=t&chls=2,4,1&chm=s,000000,0,-1,5|s,000000,1,-1,5&chxt=x,y"

    return {
      "img_url": url,
      "confirmed": cases, 
      "prediction": svm_pred,
      "date": future_forcast_dates        
    }