# Importing lib

In [1]:
import requests
import os
import nltk
import warnings
import urllib
import bs4 as bs
import pyttsx3
import speech_recognition as sr
from bs4 import BeautifulSoup
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from wikipedia import page
from PIL import Image


nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('popular', quiet=True)
warnings.filterwarnings("ignore")

import random
import string
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pandas import DataFrame

%matplotlib inline

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


# Text Gathering

## Getting Weather details for city

In [2]:
page1=requests.get('https://www.timeanddate.com/weather/japan')

In [3]:
def temperature(topic):
    
    page = page1
    soup = BeautifulSoup(page.content,'html.parser')

    data = soup.find(class_ = 'zebra fw tb-wt zebra va-m')

    tags = data('a')
    city = [tag.contents[0] for tag in tags]
    tags2 = data.find_all(class_ = 'rbi')
    temp = [tag.contents[0] for tag in tags2]

    weather = pd.DataFrame(
    {
        'City':city,
        'Temperature':temp
    }
    )
    
    df = weather[weather['City'].str.contains(topic.title())]
    
    return (df['Temperature'])

## Scrape city detail from Wiki

In [4]:
def wiki_data(topic):
    
    topic=topic.title()
    topic=topic.replace(' ', '_',1)
    url1="https://en.wikipedia.org/wiki/"
    url=url1+topic

    source = urllib.request.urlopen(url).read()

    # Parsing the data/ creating BeautifulSoup object
    soup = bs.BeautifulSoup(source,'lxml')

    # Fetching the data
    text = ""
    for paragraph in soup.find_all('p'):
        text += paragraph.text

    import re
    # Preprocessing the data
    text = re.sub(r'\[[0-9]*\]',' ',text)
    text = re.sub(r'\s+',' ',text)
    text = text.lower()
    text = re.sub(r'\d',' ',text)
    text = re.sub(r'\s+',' ',text)
    
    return (text)

# Text Cleaning

## Remove Special char

In [5]:
def rem_special(text):
    remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
    return(text.translate(remove_punct_dict))

## Stemming

In [6]:
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize

def stemmer(text):
    words = word_tokenize(text)
    for w in words:
        text=text.replace(w,PorterStemmer().stem(w))
    return text

##  Lemmatization 

In [7]:
lemmer = WordNetLemmatizer()
def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]

## Stop words

In [8]:
from nltk.tokenize.toktok import ToktokTokenizer
tokenizer = ToktokTokenizer()

stopword_list = nltk.corpus.stopwords.words('english')

def remove_stopwords(text, is_lower_case=False):
    tokens = tokenizer.tokenize(text)
    tokens = [token.strip() for token in tokens]
    if is_lower_case:
        filtered_tokens = [token for token in tokens if token not in stopword_list]
    else:
        filtered_tokens = [token for token in tokens if token.lower() not in stopword_list]
    filtered_text = ' '.join(filtered_tokens)
    return filtered_text

## Finding part of Speech (POS)

In [9]:
import spacy
import en_core_web_sm

spacy_df=[]
spacy_df1=[]
df_spacy_nltk=pd.DataFrame()
nlp = spacy.load('en_core_web_sm')
  
# Process whole documents
sample_text = ("The heavens are above. The moral code of conduct is above the civil code of conduct")
doc = nlp(sample_text)
  
# Token and Tag
for token in doc:
    spacy_df.append(token.pos_)
    spacy_df1.append(token)

df_spacy_nltk['origional']=spacy_df1
df_spacy_nltk['spacy']=spacy_df
#df_spacy_nltk

## Name Entity Recognition

In [10]:
import spacy
nlp = spacy.load('en_core_web_sm')

def ner(sentence):
    doc = nlp(sentence)
    for ent in doc.ents:
        print(ent.text, ent.label_)

## Sentiment analysis using TextBlob

In [11]:
from textblob import TextBlob

def senti(text):
    testimonial = TextBlob(text)
    return(testimonial.polarity)

## Spelling check

In [12]:
from spellchecker import SpellChecker
spell = SpellChecker()

def spelling(text):
    splits = sample_text.split()
    for split in splits:
        text=text.replace(split,spell.correction(split))
    return (text)

# Conversation

## Voice enabled
### Chatbot speak

In [13]:
def speak(message):
    engine= pyttsx3.init()
    engine.say('{}'.format(message))
    engine.runAndWait()

In [14]:
engine = pyttsx3.init()
#engine.say("Hello hi")
engine.runAndWait()

### User input

In [18]:
r = sr.Recognizer()
mic = sr.Microphone()
with mic as source:
    r.adjust_for_ambient_noise(source)
    audio = r.listen(source)
text_audio=(r.recognize_google(audio))
print(r.recognize_google(audio))

LOL hello test


In [19]:
engine.say(text_audio)
engine.runAndWait()

## Creating dictionary for cities

In [15]:
df = pd.read_csv("Cities Database.csv")
df['city'] = df['city'].str.lower()

In [16]:
df

Unnamed: 0,city_ascii,city,country
0,Tokyo,tokyo,Japan
1,Jakarta,jakarta,Indonesia
2,Delhi,delhi,India
3,Guangzhou,guangzhou,China
4,Mumbai,mumbai,India
...,...,...,...
44686,Numto,numto,Russia
44687,Nord,nord,Greenland
44688,Timmiarmiut,timmiarmiut,Greenland
44689,San Rafael,san rafael,Bolivia


In [17]:
city = dict([(i,[a]) for i, a in zip(df.city_ascii, df.city)])

In [18]:
city

{'Tokyo': ['tokyo'],
 'Jakarta': ['jakarta'],
 'Delhi': ['delhi'],
 'Guangzhou': ['guangzhou'],
 'Mumbai': ['mumbai'],
 'Manila': ['manila'],
 'Shanghai': ['shanghai'],
 'Sao Paulo': ['sao paulo'],
 'Seoul': ['seoul'],
 'Mexico City': ['mexico city'],
 'Cairo': ['cairo'],
 'New York': ['new york'],
 'Dhaka': ['dhaka'],
 'Beijing': ['beijing'],
 'Kolkata': ['kolkata'],
 'Bangkok': ['bangkok'],
 'Shenzhen': ['shenzhen'],
 'Moscow': ['moscow'],
 'Buenos Aires': ['buenos aires'],
 'Lagos': ['lagos'],
 'Istanbul': ['istanbul'],
 'Karachi': ['karachi'],
 'Bangalore': ['bangalore'],
 'Ho Chi Minh City': ['ho chi minh city'],
 'Osaka': ['osaka'],
 'Chengdu': ['chengdu'],
 'Tehran': ['tehran'],
 'Kinshasa': ['kinshasa'],
 'Rio de Janeiro': ['rio de janeiro'],
 'Chennai': ['chennai'],
 "Xi'an": ["xi'an"],
 'Lahore': ['lahore'],
 'Chongqing': ['chongqing'],
 'Los Angeles': ['los angeles'],
 'Baoding': ['baoding'],
 'London': ['london'],
 'Paris': ['paris'],
 'Linyi': ['linyi'],
 'Dongguan': ['don

In [19]:
def city_name(sentence):
    for word in sentence.split():
        for key, values in city.items():
            if word.lower() in values:
                return(key)

## Pre-processing all

In [20]:
def LemNormalize(text):
    text=rem_special(text)
    text=text.lower()
    text=remove_stopwords(text)
    
    return LemTokens(nltk.word_tokenize(text))

## Generating answer using Cosine Similarity

In [21]:
#Generating answer
def response(user_input):
    
    Bot_response=''
    sent_tokens.append(user_input)
    
    word_vectorizer = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    all_word_vectors = word_vectorizer.fit_transform(sent_tokens)
    
    similar_vector_values = cosine_similarity(all_word_vectors[-1], all_word_vectors)
    idx=similar_vector_values.argsort()[0][-2]
    
    matched_vector = similar_vector_values.flatten()
    matched_vector.sort()
    vector_matched = matched_vector[-2]
    
    if(vector_matched==0):
        Bot_response=Bot_response+"I am sorry! I don't understand you."
        return Bot_response
    else:
        Bot_response = Bot_response+sent_tokens[idx]
        return Bot_response


## Input city 

In [22]:
topic=str(input("Please enter the city name you want to ask queries for: "))
topic=city_name(topic)
country_name = df[df['city_ascii'] == topic]['country'].values[0]
text=wiki_data(topic)

page1=requests.get('https://www.timeanddate.com/weather/' + country_name)
sent_tokens = nltk.sent_tokenize(text) # converts to list of sentences
word_tokens = nltk.word_tokenize(text) # converts to list of words
weather_reading=(temperature(topic)).iloc[0]

## Greetings

In [23]:
# Greetings Keyword matching
GREETING_INPUTS = ("hello", "hi", "greetings", "sup", "what's up","hey")
GREETING_RESPONSES = ["hi", "hey", "hi there", "hello", "I am glad! You are talking to me"]

def greeting(sentence):
    for word in sentence.split():
        if word.lower() in GREETING_INPUTS:
            return random.choice(GREETING_RESPONSES)

## Places

In [24]:
PLACES_INPUTS = ("places", "monuments", "buildings","places", "monument", "building")

import spacy
nlp = spacy.load('en_core_web_sm')

def ner(sentence):
    places_imp=""
    doc = nlp(sentence)
    for ent in doc.ents:
        if (ent.label_=="FAC"):
            places_imp=places_imp+ent.text+","+" "
            
    return(places_imp)
    
places_imp=ner(text)

s=places_imp
l = s.split()
k = []
for i in l:
    # If condition is used to store unique string
    # in another list 'k'
    if (s.count(i)>1 and (i not in k)or s.count(i)==1):
        k.append(i)

PLACES_RESPONSES = ' '.join(k)

def places(sentence):
    for word in sentence.split():
        if word.lower() in PLACES_INPUTS:
            return (PLACES_RESPONSES)

In [25]:
def reset_places():
    places_imp=ner(text)
    s=places_imp
    l = s.split()
    k = []
    for i in l:
        # If condition is used to store unique string
        # in another list 'k'
        if (s.count(i)>1 and (i not in k)or s.count(i)==1):
            k.append(i)
    PLACES_RESPONSES = ' '.join(k)
    return PLACES_RESPONSES

## Weather 

In [26]:
WEATHER_INPUTS = ("weather", "temp", "temperature")

WEATHER_RESPONSES = weather_reading

def weather(sentence):
    for word in sentence.split():
        if word.lower() in WEATHER_INPUTS:
            return (WEATHER_RESPONSES)

## Chat

In [28]:
continue_dialogue=True
print("Bot: Hello")
speak("Hello")
topic=str(input("Please enter the city name you want to ask queries for: "))
topic=city_name(topic)
country_name = df[df['city_ascii'] == topic]['country'].values[0]
text=wiki_data(topic)

page1=requests.get('https://www.timeanddate.com/weather/' + country_name)
sent_tokens = nltk.sent_tokenize(text) # converts to list of sentences
word_tokens = nltk.word_tokenize(text) # converts to list of words
print(temperature(topic))
weather_reading=(temperature(topic)).iloc[0]
PLACES_RESPONSES = reset_places()
WEATHER_RESPONSES = weather_reading

while(continue_dialogue==True):
    user_input = input("User:")
    user_input=user_input.lower()
    user_input=spelling(user_input) #spelling check
    print("Sentiment score=",senti(user_input)) #sentiment score
    
    if(user_input!='bye'):
        if(user_input=='thanks' or user_input=='thank you' ):
            print("Bot: You are welcome..")
            speak(" You are welcome")
            
        else:
            if(greeting(user_input)!=None):
                tmp=greeting(user_input)
                print("Bot: "+tmp)
                speak(tmp)
                
            elif(weather(user_input)!=None):
                tmp=weather(user_input)
                print("Bot: "+tmp)
                speak(tmp)
                
            elif(places(user_input)!=None):
                tmp=places(user_input)
                print("Bot: Important places are "+tmp)
                speak("Important places are")
                speak(tmp)
                
            else:
                print("Bot: ",end="")
                temp_response=response(user_input)
                print(temp_response)
                speak(temp_response)
                sent_tokens.remove(user_input)
                
    else:
        continue_dialogue=False
        print("Bot: Goodbye.")
        speak("goodbye")

Bot: Hello
8      27 °C
231    27 °C
Name: Temperature, dtype: object
Sentiment score= 0.4
Bot: Important places are delhi airport, the metro, jawaharlal nehru stadium,
Sentiment score= 0.0
Bot: the jama masjid was built by the mughal emperor shah jahan between and .
Sentiment score= 0.0
Bot: in – , approximately , , students were enrolled in primary schools, , in middle schools and , in secondary schools across delhi.
Sentiment score= 0.0
Bot: air pollution in delhi to vehicular emissions, .
Sentiment score= 0.0
Bot: Goodbye.
