# Installation of dependencies

In [1]:
# streamlit and ngrok and sentence bert dependencies
!pip install streamlit
!pip install streamlit-chat
!pip install nest-asyncio
!pip install pyngrok
!pip install protobuf==3.20
!pip install sentence-transformers
!pip install h5py
!pip install BeautifulSoup4

# questgen dependencies
!python -m nltk.downloader universal_tagset
!python -m spacy download en 
!pip install git+https://github.com/ramsrigouthamg/Questgen.ai
!pip install git+https://github.com/boudinfl/pke.git@69337af9f9e72a25af6d7991eaa9869f1322dd72
!wget https://github.com/explosion/sense2vec/releases/download/v1.0.0/s2v_reddit_2015_md.tar.gz
!tar -xvf  s2v_reddit_2015_md.tar.gz

# transformers dependencies
!pip install transformers==3
!pip install torchinfo
!pip install detectlanguage
!pip install translators

!pip install fastapi nest-asyncio pyngrok uvicorn

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting streamlit
  Downloading streamlit-1.16.0-py2.py3-none-any.whl (9.2 MB)
[K     |████████████████████████████████| 9.2 MB 3.2 MB/s 
[?25hCollecting blinker>=1.0.0
  Downloading blinker-1.5-py2.py3-none-any.whl (12 kB)
Collecting validators>=0.2
  Downloading validators-0.20.0.tar.gz (30 kB)
Collecting gitpython!=3.1.19
  Downloading GitPython-3.1.29-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 55.8 MB/s 
Collecting watchdog
  Downloading watchdog-2.2.0-py3-none-manylinux2014_x86_64.whl (78 kB)
[K     |████████████████████████████████| 78 kB 4.0 MB/s 
Collecting pydeck>=0.1.dev5
  Downloading pydeck-0.8.0-py2.py3-none-any.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 44.9 MB/s 
Collecting rich>=10.11.0
  Downloading rich-12.6.0-py3-none-any.whl (237 kB)
[K     |████████████████████████████████| 237 kB 50.6 MB/s 
[?25hCollecting 

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sentence-transformers
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[K     |████████████████████████████████| 85 kB 5.0 MB/s 
[?25hCollecting transformers<5.0.0,>=4.6.0
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 68.9 MB/s 
Collecting sentencepiece
  Downloading sentencepiece-0.1.97-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 68.1 MB/s 
[?25hCollecting huggingface-hub>=0.4.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 80.9 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 58.7 MB/s 
Building wheels for collected pa

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/boudinfl/pke.git@69337af9f9e72a25af6d7991eaa9869f1322dd72
  Cloning https://github.com/boudinfl/pke.git (to revision 69337af9f9e72a25af6d7991eaa9869f1322dd72) to /tmp/pip-req-build-if1v4gl6
  Running command git clone -q https://github.com/boudinfl/pke.git /tmp/pip-req-build-if1v4gl6
  Running command git rev-parse -q --verify 'sha^69337af9f9e72a25af6d7991eaa9869f1322dd72'
  Running command git fetch -q https://github.com/boudinfl/pke.git 69337af9f9e72a25af6d7991eaa9869f1322dd72
  Running command git checkout -q 69337af9f9e72a25af6d7991eaa9869f1322dd72
Collecting sklearn
  Downloading sklearn-0.0.post1.tar.gz (3.6 kB)
Building wheels for collected packages: pke, sklearn
  Building wheel for pke (setup.py) ... [?25l[?25hdone
  Created wheel for pke: filename=pke-2.0.0-py3-none-any.whl size=6160198 sha256=137cd30f0388b17e1bac3614aa3d36153d6ca2a9025c8df011

# Data Extraction from RURA and REMA
## Tools used


*   BeautifulSoup and requests for extracting titles and links
*   regex for cleaning titles

## After Collecting


*   save the titles and links in a csv file called data_titles.csv

## Actions performed


*   checking for links in a page
*   getting full link for all links
*   verifying if there is title
*   cleaning titles
*   combining links from RURA and REMA
*   saving csv file







In [2]:
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
import re

import warnings
warnings.filterwarnings('ignore')

def verify_full_link(link, main, main_link=None):
  if link=='#':
    return main_link
  if link[-2]=='.':
    link = link[:-2]+link[-1]
  if re.search('http', str(link)):
    return str(link)
  else:
    if link[0]=='?' or '/':
      return main+link
    else:
      return main+link

def verify_title(title):
  if len(str(title))>0: return str(title)
  else: return None

# cleaning titles
def preprocessing(titles : list):
  cleaned_titles = []
  for title in titles:
    title = title.lower()
    url_pattern = re.compile(r'https?://\S+|www\.\S+')
    html_pattern = re.compile('<.*?>')
    title = url_pattern.sub(r'', title)
    title = html_pattern.sub(r'', title)
    title = re.sub(r"[^\w\d'\s]+", ' ', title)
    cleaned_titles.append(title)
  return cleaned_titles

def get_links(link, main=None):
  content = requests.get(link, verify=False, timeout=10).content
  page = bs(content, 'html.parser')
  links = page.find_all('a')
  titles = [verify_title(item.text) for item in links]
  links_href = [verify_full_link(item['href'],main, link) for item in links]
  # descriptions = [get_description(item) for item in links_href]
  
  # create dataframe
  df = pd.DataFrame()
  df['title'] = titles
  df['links'] = links_href
  # df['description'] = descriptions

  # drop empty titles
  df = df.dropna(subset=['title']).reset_index(drop=True)

  # drop duplicate 
  df = df.drop_duplicates().reset_index(drop=True)
  df['title'] = preprocessing(df['title'])
  return df

df1 = get_links('https://rura.rw/index.php','https://rura.rw/')
df2 = get_links('https://www.rema.gov.rw/home','https://rema.gov.rw/')

frames = [df1, df2]
# combine dataframes
df = pd.concat(
    frames, 
    ignore_index=True
    )

# save csv file
df.to_csv('data_titles.csv')
df

Unnamed: 0,title,links
0,business plan to buy an apartment building wri...,https://rura.rw/index.php
1,business plan to buy an apartment building,http://rura.rw/?business-plan-to-buy-an-apartm...
2,writing a good abstract for research paper,http://rura.rw/?writing-a-good-abstract-for-re...
3,statement of admission,http://rura.rw/?statement-of-admission
4,home,https://rura.rw/index.php?id=23
...,...,...
216,conventions and protocols,https://rema.gov.rw//resources/conventions-and...
217,projects,https://rema.gov.rw//our-work/projects
218,partnerships,https://rema.gov.rw//our-work/partnerships
219,report an environmental crime,https://rema.gov.rw//get-involved/report-a-crime


# Generating Intents, Questions and Question.csv
## Tools used


*   pandas
*   questgen: a model for generating question from a text. effective for text longer 45 characters

## After generating
*   save intents to intents.json where each intent has a tag(link) and response(title)
*   save questions to questions.csv with columns questions(as text) and tag(label)

## Actions performed
*   create general questions for getting links
*   generate questions using questgen model




In [3]:
# quesgen setup and nltk
import nltk
nltk.download('stopwords')
from Questgen import main

qe = main.BoolQGen()

# quesgen
def questgen_generate(answer, n=2, qe=qe):
  payload = {
      'input_text':str(answer)
  }
  output = qe.predict_boolq(payload)
  return output['Boolean Questions'][:n]

import json
import pandas as pd

question = """Guide me to {}?
Where do you get {}?
Where can I find information about {}?
What is the link to the {}?
What can you say about {}?
How do you get {}?
What can you tell me about {}?"""

def generate_intents_and_df(df, 
                            qe_n=45, 
                            intents_json='intents.json', 
                            questions_df='questions.csv'):
  
  # generate intents
  intents = {
      'intents':[]
  }
  questions_df_ls = [] 
  labels_df_ls = []
  for index, row in df.iterrows():
    intent = {}
    intent['tag'] = row[1]
    intent['response'] = row[0]
    questions = []
    for i in range(len(question.split('\n'))):
      questions.append(question.split('\n')[i].format(intent['response']))
    # generate more questions using questgen
    # string longer than 45 brings accurate questions
    if len(str(intent['tag']))>qe_n:
      questions.extend(questgen_generate(intent['tag']))
    else:
      pass
    intents['intents'].append(intent)
    labels_df_ls.extend([intent['tag']] * len(questions))
    questions_df_ls.extend(questions)

  # save intents into json file
  with open(intents_json, 'w+') as f:
    intents = json.dumps(intents)
    f.write(intents)
  # save questions and intent tag as labels into csv file 
  df = pd.DataFrame()
  df['text'] = questions_df_ls
  df['label'] = labels_df_ls
  df.to_csv(questions_df)

  return intents, df


intents, questions = generate_intents_and_df(df)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading collection 'popular'
[nltk_data]    | 
[nltk_data]    | Downloading package cmudict to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/cmudict.zip.
[nltk_data]    | Downloading package gazetteers to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/gazetteers.zip.
[nltk_data]    | Downloading package genesis to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/genesis.zip.
[nltk_data]    | Downloading package gutenberg to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/gutenberg.zip.
[nltk_data]    | Downloading package inaugural to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/inaugural.zip.
[nltk

Downloading:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/892M [00:00<?, ?B/s]

# Training Our model
## Tools used
*   transformers: for increasing our classification model accuracy 
*   pytorch
*   sklearn: for labelencoding and class weight computing

## After Training
*   save best model to model.pth

## Actions Performed
*   encoding labels
*   setting up bert with facebook/bart-large
*   tokenizing our data and creating a dataloader
*   creating our bert architecture model
*   balancing our weights
*   defining our training loss function
*   training our model
*   evaluating our model over 200 epochs
*   saving our model

In [4]:
# setup
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [5]:
# import
# import libraries
import numpy as np
import pandas as pd
import re
import torch
import torch.nn as nn
import transformers
import matplotlib.pyplot as plt
import json
from transformers import RobertaTokenizer, RobertaModel, DistilBertTokenizer, DistilBertModel
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import AdamW
from sklearn.utils.class_weight import compute_class_weight
from torch.optim import lr_scheduler
import random
from sklearn.preprocessing import LabelEncoder

device = torch.device('cuda')

encoder

In [6]:
le = LabelEncoder()
questions['label'] = le.fit_transform(questions['label'])

train_text, train_labels = questions['text'], questions['label']

# calculating number of unique labels
unique_labels = len(set(questions['label']))

Setting up Bert

In [7]:
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

bert = DistilBertModel.from_pretrained('distilbert-base-uncased')

max_seq_len = 16

# tokenize and encode sequences in training set
tokens_train = tokenizer(
    train_text.tolist(),
    max_length = max_seq_len,
    pad_to_max_length = True,
    truncation = True,
    return_token_type_ids = False
)

# train data to tensors
train_seq = torch.tensor(tokens_train['input_ids'])
train_mask = torch.tensor(tokens_train['attention_mask'])
train_y = torch.tensor(train_labels.tolist())

# dataloaders
batch_size = 20
train_data = TensorDataset(train_seq, train_mask, train_y)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/442 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/268M [00:00<?, ?B/s]

Creating Bert model

In [8]:
# model

class BERT_Arch(nn.Module):
  def __init__(self, bert, dropout=0.2, hl1=768, hl2=512, hl3=512, out=3):
    super(BERT_Arch, self).__init__()
    self.bert = bert
    self.dropout = nn.Dropout(dropout)
    self.relu = nn.ReLU()
    self.fc1 = nn.Linear(hl1, hl2)
    self.fc2 = nn.Linear(hl2, hl3)
    self.fc3 = nn.Linear(hl3, out)
    self.softmax = nn.LogSoftmax(dim=1)

  def forward(self, sent_id, mask):
    cls_hs = self.bert(sent_id, attention_mask=mask)[0][:,0]
    x = self.fc1(cls_hs)
    x = self.relu(x)
    x = self.dropout(x)
    x = self.fc2(x)
    x = self.relu(x)
    x = self.dropout(x)
    x = self.fc3(x)
    x = self.softmax(x)
    return x

Calling model

In [9]:
# freezing parameters
for param in bert.parameters():
  param.requires_grad = False

model = BERT_Arch(bert, out=unique_labels)
model = model.to(device)

Optimizing model

In [10]:
optimizer = AdamW(model.parameters(), lr = 1e-3)

Checking class weights

In [11]:
class_wts = compute_class_weight('balanced', classes=np.unique(train_labels), y=train_labels)

Convert weights to tensor

In [12]:
# convert class weights to tensor
weights= torch.tensor(class_wts,dtype=torch.float)
weights = weights.to(device)
# loss function
cross_entropy = nn.NLLLoss(weight=weights) 

Training loss initialization

In [13]:
train_losses=[]
# number of training epochs
epochs = 400
# We can also use learning rate scheduler to achieve better results
lr_sch = lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)

Training model

In [14]:
# function to train the model
def train():
  
  model.train()
  total_loss = 0
  
  # empty list to save model predictions
  total_preds=[]
  
  # iterate over batches
  for step,batch in enumerate(train_dataloader):
    
    # progress update after every 50 batches.
    if step % 50 == 0 and not step == 0:
      print('\tBatch {:>5,} out of {:>5,}.'.format(step,len(train_dataloader)))
      print(f'\tloss: {total_loss/len(train_dataloader):.3f}')
    # push the batch to gpu
    batch = [r.to(device) for r in batch] 
    sent_id, mask, labels = batch
    # get model predictions for the current batch
    preds = model(sent_id, mask)
    # compute the loss between actual and predicted values
    loss = cross_entropy(preds, labels)
    # add on to the total loss
    total_loss = total_loss + loss.item()
    # backward pass to calculate the gradients
    loss.backward()
    # clip the the gradients to 1.0. It helps in preventing the    exploding gradient problem
    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
    # update parameters
    optimizer.step()
    # clear calculated gradients
    optimizer.zero_grad()
  
    # We are not using learning rate scheduler as of now
    # lr_sch.step()
    # model predictions are stored on GPU. So, push it to CPU
    preds=preds.detach().cpu().numpy()
    # append the model predictions
    total_preds.append(preds)
  # compute the training loss of the epoch
  avg_loss = total_loss / len(train_dataloader)
    
  # predictions are in the form of (no. of batches, size of batch, no. of classes).
  # reshape the predictions in form of (number of samples, no. of classes)
  total_preds  = np.concatenate(total_preds, axis=0)
  #returns the loss and predictions
  return avg_loss, total_preds

Training epochs

In [None]:
for epoch in range(epochs):
     
    print('Epoch {:} / {:}'.format(epoch + 1, epochs))
    
    #train model
    train_loss, _ = train()
    
    # append training and validation loss
    train_losses.append(train_loss)
    # it can make your experiment reproducible, similar to set  random seed to all options where there needs a random seed.
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
print(f'\nTraining Loss: {train_loss:.3f}')

Epoch 1 / 400
	Batch    50 out of    84.
	loss: 3.100
Epoch 2 / 400
	Batch    50 out of    84.
	loss: 3.070
Epoch 3 / 400
	Batch    50 out of    84.
	loss: 2.915
Epoch 4 / 400
	Batch    50 out of    84.
	loss: 2.655
Epoch 5 / 400
	Batch    50 out of    84.
	loss: 2.363
Epoch 6 / 400
	Batch    50 out of    84.
	loss: 2.178
Epoch 7 / 400
	Batch    50 out of    84.
	loss: 2.008
Epoch 8 / 400
	Batch    50 out of    84.
	loss: 1.920
Epoch 9 / 400
	Batch    50 out of    84.
	loss: 1.782
Epoch 10 / 400
	Batch    50 out of    84.
	loss: 1.675
Epoch 11 / 400
	Batch    50 out of    84.
	loss: 1.566
Epoch 12 / 400
	Batch    50 out of    84.
	loss: 1.422
Epoch 13 / 400
	Batch    50 out of    84.
	loss: 1.388
Epoch 14 / 400
	Batch    50 out of    84.
	loss: 1.300
Epoch 15 / 400
	Batch    50 out of    84.
	loss: 1.231
Epoch 16 / 400
	Batch    50 out of    84.
	loss: 1.180
Epoch 17 / 400
	Batch    50 out of    84.
	loss: 1.182
Epoch 18 / 400
	Batch    50 out of    84.
	loss: 1.087
Epoch 19 / 400
	Bat

Save model

In [None]:
model_path = 'model.pth'
torch.save(model, model_path)

In [None]:
def get_prediction(str):
 str = re.sub(r'[^a-zA-Z ]+', '', str)
 test_text = [str]
 model.eval()
 
 tokens_test_data = tokenizer(
 test_text,
 max_length = max_seq_len,
 pad_to_max_length=True,
 truncation=True,
 return_token_type_ids=False
 )
 test_seq = torch.tensor(tokens_test_data['input_ids'])
 test_mask = torch.tensor(tokens_test_data['attention_mask'])
 
 preds = None
 with torch.no_grad():
   preds = model(test_seq.to(device), test_mask.to(device))
 preds = preds.detach().cpu().numpy()
 preds = np.argmax(preds, axis = 1)
 print('Intent Identified: ', le.inverse_transform(preds)[0])
 return le.inverse_transform(preds)[0]

intents = open('intents.json')
intents = json.load(intents)

def get_response(message): 
  intent = get_prediction(message)
  for i in intents['intents']: 
    if i["tag"] == intent:
      result = i["response"]
      break
  print(f"Response : {result}")
  return intent, result

get_response('what can you tell us about RURA')

# Deploy model with fastapi

In [None]:
from fastapi import FastAPI
from pydantic import BaseModel
import nest_asyncio
from pyngrok import ngrok
import uvicorn

app = FastAPI()

class Text(BaseModel):
  text: str

@app.post('/')
async def predict(text:Text):
  link, title = get_response(str(text))
  return {
      'link':link,
      'title':title
  }

ngrok_tunnel = ngrok.connect(8000)
print('Public URL:', ngrok_tunnel.public_url)
nest_asyncio.apply()
uvicorn.run(app, port=8000)

# Deploying app with streamlit
## Tools used 
*   Streamlit
*   Streamlit chat for display like
*   sentence-bert for checking sentence confidence
*   detectlanguage for detecting language

## Actions performed
*   verify answer using sentence bert
*   translation, foreign to english and back
*   make request to our fastapi 
*   display chat in chat format using streamlit-chat

In [None]:
# %%writefile app.py
# setup
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

import streamlit as st
from streamlit_chat import message
import detectlanguage
import translators as ts
import translators.server as tss
from sentence_transformers import SentenceTransformer, util
import torch
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder
import numpy as np
import pandas as pd
import json
from transformers import RobertaTokenizer, RobertaModel
import re
import requests

# tools setup
# setup detect language api keys
detectlanguage.configuration.api_key = '7a1c7069f905116a159438796c09db8e'

# setup sbert model
st.session_state['sbert_model'] = SentenceTransformer('all-MiniLM-L6-v2')

# streamlit setup
st.set_page_config(
    page_title='Chatbot',
    page_icon=':robot:'
)
st.header('Chat With Us')
if 'generated' not in st.session_state:
    st.session_state['generated'] = []

if 'past' not in st.session_state:
    st.session_state['past'] = []
    

# verify answer
def verify_answer(question, answer):
    qs = [question]
    ans = [answer]
    
    embeddings1 = st.session_state['sbert_model'].encode(qs, convert_to_tensor=True)
    embeddings2 = st.session_state['sbert_model'].encode(ans, convert_to_tensor=True)
    
    score = util.cos_sim(embeddings1, embeddings2)
    return score[0]

# translation function or class
class Translation:
    def __init__(self, text, data_lang='en'):
        self.text = text
        self.language = detectlanguage.detect(str(text))[0]['language']
        self.data_lang = data_lang

    def encode(self):
        if self.language == self.data_lang:
            return self.text
        else:
            translation = tss.google(self.text, self.language, self.data_lang)
            return translation
        
    def bot_response(self, title, confidence, link):
        confidence_text = ''
        if confidence>0.75:
            confidence_text = 'You can find information related to "{}" on this link: {}'.format(title, link)
        elif confidence<=0.75 and confidence>0.50:
            confidence_text = '50-50 chance you will find information related to "{}" on this link: {}'.format(title, link)
        elif confidence<=0.50 and confidence>0.30:
            confidence_text = 'Am not sure, but you might find information related to "{}" on this link: {}'.format(title, link) 
        else:
            confidence_text = "Sorry, I couldn't find information. Can you elaborate more on the question?"
        return confidence_text

    def decode(self, title, link):
        confidence = verify_answer(self.text, title)
        if self.language == self.data_lang:
            return self.bot_response(title, confidence, link)
        else:
            answer = self.bot_response(title, confidence, link)
            translation = tss.google(answer, self.data_lang, self.language)
            return translation
        

# display user and bot chat from dictionary
# keep scrolling up as input
# input and answer saved in a dictionary 
def get_text():
    user_text = st.text_input('You: ', placeholder='Message', key='input')
    return user_text

def get_response(text, link):
    obj = {'text':str(text)}
    try:
        ans = requests.post(link, json=obj)
        response = json.loads(ans.text)
        return response['link'], response['title']
    except Exception as err:
        st.write(err)
        return

user_text = get_text()

if user_text:
    trans = Translation(user_text)
    user_text_translated = trans.encode()
    st.write(user_text_translated)
    
    bot_answer = get_response(user_text_translated, 'http://40d5-35-229-75-39.ngrok.io/')
    st.write(bot_answer)
    bot_text_translated = trans.decode(bot_answer[0],bot_answer[1])
    
    st.session_state.past.append(user_text)
    st.session_state.generated.append(bot_text_translated)
    
# display chat messages
if st.session_state['generated']:
    for i in range(len(st.session_state['generated'])-1, -1, -1):
        message(st.session_state["generated"][i], key=str(i))
        message(st.session_state['past'][i], is_user=True, key=str(i) + '_user')
