# Suggesting Algorithm and Tools to be used for the given problem (Topic)

### Installations
##### pip install nltk
##### pip install sklearn

### Preparing nltk

In [2]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Aniket\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

### Importing other packages

In [24]:
import string
from sklearn.feature_extraction.text import TfidfVectorizer

### Cosine similarity function

In [25]:
stemmer = nltk.stem.porter.PorterStemmer()
remove_punctuation_map = dict((ord(char), None) for char in string.punctuation)

def stem_tokens(tokens):
    return [stemmer.stem(item) for item in tokens]

# remove punctuation, lowercase, stem
def normalize(text):
    return stem_tokens(nltk.word_tokenize(text.lower().translate(remove_punctuation_map)))

vectorizer = TfidfVectorizer(tokenizer=normalize, stop_words='english')

def cosine_sim(text1, text2):
    tfidf = vectorizer.fit_transform([text1, text2])
    return ((tfidf * tfidf.T).A)[0, 1]


### Code to load csv file 

In [37]:
# getRows(), takes the file name and returns the list of topics from the dataset
import csv


def getRows(filename):
    topics = []
    with open(filename, mode='r', encoding='utf8', errors='ignore') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        line_count = 0
        for row in csv_reader:
            if line_count == 0:
                line_count += 1
            topics.append(row["Topic"])
            line_count += 1
        return topics

# getresult takes filename and the index of the row to be searched and 
# returns the algorithm and tools corresponding to that index 
def getresult(filename, index):
    topics = []
    with open(filename, mode='r', encoding='utf8', errors='ignore') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        line_count = 0
        for row in csv_reader:
            if line_count == index:
                print("Algorithm: %s" % row["Algorithms"] + "\nTools: %s"% row["Tools used"])
                break
            
            line_count += 1


In [39]:
if __name__ == '__main__':
    print('Loading file...')
    # loading dataset.csv file
    filename = 'dataset.csv'  # name of the file to be loaded 
    topic_list = getRows(filename)

    input_string = input('Enter the search string: ')

    cosine_list = {}
    for i, x in enumerate(topic_list):
        cosine_list.update({i: cosine_sim(x, input_string)})
    sorted_list = sorted(cosine_list.items(), key=lambda x: x[1], reverse=True)

    # get the index of the top result
    for x in sorted_list[:1]:
        # print(x[0])
        getresult(filename, x[0])  # x[0] is the index of required record in dataset.csv file

Loading file...
Enter the search string: chatbot
Algorithm: Naive Bayes Classifier
Tools: Flask (Python Web Framework), Pycharm IDE
