# Import Libraries

In [50]:
################ WEB SCRAPING MODULES ############
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import bs4
from fake_useragent import UserAgent
import requests
################ TIME MODLULES ###################
import time
from datetime import date 
import datetime
############## DATA MANIPULATION MODULES #########
import pandas as pd
import numpy as np
import re
from nltk.corpus import stopwords

# Read 100 questions list

In [51]:
df = pd.read_excel('questions.xlsx',engine='openpyxl')
link = 'https://springbrookbehavioral.com/frequently-asked-questions-about-autism/'
df[link]=np.nan
df.drop(df.iloc[:, 1:25], inplace = True, axis = 1)
df.head

<bound method NDFrame.head of                                              Question  \
0                                     What is Autism?   
1       What are the Autism Spectrum Disorders (ASD)?   
2                        What is Asperger’s Syndrome?   
3   How can you tell Autism from Asperger’s Syndrome?   
4   What is Pervasive Developmental Disorder – Not...   
..                                                ...   
95  What are some ways that parents can reduce the...   
96  Do some families deal with stress better than ...   
97  Do siblings suffer increased stress as a resul...   
98          What can I do about my children’s stress?   
99  What are some resources for learning more abou...   

    https://springbrookbehavioral.com/frequently-asked-questions-about-autism/  
0                                                 NaN                           
1                                                 NaN                           
2                                         

# Scrape QA pairs from website

In [52]:
# Open webpage in a new window
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get('https://springbrookbehavioral.com/frequently-asked-questions-about-autism/')



Current google-chrome version is 92.0.4515
Get LATEST driver version for 92.0.4515
Driver [/home/gopan/.wdm/drivers/chromedriver/linux64/92.0.4515.107/chromedriver] found in cache


In [53]:
# Parse text in webpage
source = driver.page_source
soup = bs4.BeautifulSoup(source, 'html.parser')

In [54]:
# Search the questions mentioned in webpage
l = []
for i in soup.find_all('a',{'class':'fl-accordion-button-label'}):
    if(i.text[-1]=='?'):
        print(i.text)
        l.append(i.text)

Did I Cause My Child’s Autism?
How Can I Deal With This Diagnosis?
How Do I Find Help For My Child?
Will My Child Ever Be Normal?
What Will My Child’s Future Be Like?
What Can I Do To Prepare My Child For The Future?
Can My Child’s Autism Be Cured?
Will My Child Ever Be Able To Talk?
Is There A Medication That Will Get My Child To Stop __________________?


In [82]:
# Tried to search for answers of questions
k=[]
c = 0
for i in soup.find_all('div',{'class':'fl-accordion-content fl-clearfix'}):
    txt = i.text.strip()
    if(txt in l):
        print('*'*100)
    if(txt in l or len(txt.split())>50):
        print(c,'-->', txt)
    c = c+1
    
    k.append(txt)

0 --> No. After getting an ASD diagnosis, this is the first question that many parents have—especially mothers. However, while there may be a genetic component to autism, there is no scientific evidence that anything that you did or did not do while your child was in utero or in his or her formative years caused the disorder. Please understand that feelings of anger, frustration, and guilt are perfectly normal after the initial diagnosis. In many ways, you are experiencing a profound loss, and you should expect to grieve. Our military and TRICARE consultant DJ Reyes has written a blog about his personal journey through the stages of grief after receiving his son’s diagnosis that may be helpful for you.
1 --> Start by being patient and kind with yourself, your spouse, and your child. You have just received life-altering news. Give yourself time to process all of the feelings that you are going to encounter, and don’t suppress your emotions. Many parents of children with autism find it h

# Check which questions are similar

In [56]:
stop_words = set(stopwords.words('english'))

def clean_text(sent):
    sent = sent.lower() # lowercase
    sent = re.sub(r'[^\w\s]', '', sent) # remove punctuations
    sent = re.sub('Autism Spectrum Disorder','ASD',sent) # Compress term
    sent = [w for w in sent.split() if not w.lower() in stop_words] # Remove stopwords
    sent = " ".join(sent)
    return sent

In [57]:
def jaccard_similarity(list1, list2):
    s1 = set(list1)
    s2 = set(list2)
    return float(len(s1.intersection(s2)) / len(s1.union(s2)))

In [58]:
for i in l:
    i2 = clean_text(i)
    list1 = i2.split()
    
    temp1 = 0.0
    temp2 = ''
    temp3 = ''
    
    for j in df['Question']:
        j2 = clean_text(j)
        list2 = j2.split()
            
        sim = jaccard_similarity(list1, list2)
        
        if(sim>temp1):
            temp1 = sim
            temp2 = j
            temp3 = i
            
    if(temp1>=0.3):
        print('Website --> ',temp3)
        print('100 questions list --> ',temp2)
        print(temp1)
        print('*'*100)

Website -->  Did I Cause My Child’s Autism?
100 questions list -->  Does Thimerosal cause autism?
0.5
****************************************************************************************************
Website -->  How Do I Find Help For My Child?
100 questions list -->  Can chelation therapy help my autistic child?
0.3333333333333333
****************************************************************************************************
Website -->  Can My Child’s Autism Be Cured?
100 questions list -->  What is Autism?
0.3333333333333333
****************************************************************************************************


In [59]:
# Directly add answer to 'What are the Autism Spectrum Disorders (ASD)?' in existing dataframe
#df.iloc[2,0]
#df.iloc[1,1]=k[3]

In [60]:
l

['Did I Cause My Child’s Autism?',
 'How Can I Deal With This Diagnosis?',
 'How Do I Find Help For My Child?',
 'Will My Child Ever Be Normal?',
 'What Will My Child’s Future Be Like?',
 'What Can I Do To Prepare My Child For The Future?',
 'Can My Child’s Autism Be Cured?',
 'Will My Child Ever Be Able To Talk?',
 'Is There A Medication That Will Get My Child To Stop __________________?']

In [61]:
#l=l[1:]
#l

In [62]:
# manually select answers to questions in list 'l'

#l2 = [k[8]+'\n'+k[12]+'\n'+k[13]+'\n'+k[14],
#      k[16]+'\n'+k[17],
#     k[19]+'\n'+k[21]]
l2 = k

In [63]:
# Create new dataframe with QA pairs
df2 = pd.DataFrame(zip(l,l2),columns=['Question',link])
df2

Unnamed: 0,Question,https://springbrookbehavioral.com/frequently-asked-questions-about-autism/
0,Did I Cause My Child’s Autism?,"No. After getting an ASD diagnosis, this is th..."
1,How Can I Deal With This Diagnosis?,"Start by being patient and kind with yourself,..."
2,How Do I Find Help For My Child?,Start by making sure that you and your child h...
3,Will My Child Ever Be Normal?,There’s a saying in the autism community that ...
4,What Will My Child’s Future Be Like?,"The truth is, we don’t really know how life wi..."
5,What Can I Do To Prepare My Child For The Future?,Early intervention can do a lot toward fosteri...
6,Can My Child’s Autism Be Cured?,No. Autism is not a disease that can be cured—...
7,Will My Child Ever Be Able To Talk?,There’s no way to know. Some children with aut...
8,Is There A Medication That Will Get My Child T...,The common behaviors associated with ASD are o...


In [64]:
# Concatenate existing and new dataframes
df3 = pd.concat([df,df2],axis=0)
df3 = df3.sort_values(by=list(df3.columns[1:])).reset_index(drop=True)
df3

Unnamed: 0,Question,https://springbrookbehavioral.com/frequently-asked-questions-about-autism/
0,What Can I Do To Prepare My Child For The Future?,Early intervention can do a lot toward fosteri...
1,Did I Cause My Child’s Autism?,"No. After getting an ASD diagnosis, this is th..."
2,Can My Child’s Autism Be Cured?,No. Autism is not a disease that can be cured—...
3,How Can I Deal With This Diagnosis?,"Start by being patient and kind with yourself,..."
4,How Do I Find Help For My Child?,Start by making sure that you and your child h...
...,...,...
104,What are some ways that parents can reduce the...,
105,Do some families deal with stress better than ...,
106,Do siblings suffer increased stress as a resul...,
107,What can I do about my children’s stress?,


# Save Output

In [65]:
df3.to_excel('AnswersFromSource11_20.xlsx',index=False)