# 0. Installation (one time job)

In [1]:
#!pip install scikit-learn==0.23.1

In [2]:
# xlrd no longer support xlsx - https://stackoverflow.com/questions/65254535/xlrd-biffh-xlrderror-excel-xlsx-file-not-supported
!pip install openpyxl



# 1. Import Library

In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from TextPreprocessing import text_preprocessing

# 2. Check Data

In [4]:
# Read data
pd.set_option('display.max_columns', None)

# data = pd.read_excel('./ASD FAQ KB v001.xlsx', sheet_name='FAQ')
data = pd.read_excel('./ASD FAQ KB v001.xlsx', sheet_name='FAQ', engine='openpyxl')

data.head()

Unnamed: 0,sn,Question,Long_Answer,Short_Answer,Source,Remarks
0,1,What are Autism Spectrum Disorders (ASD)?,ASD refers to a wide spectrum of neurodevelopm...,,http://birchtreecenter.org/learn/autism,
1,2,How common is autism?,According to a 2020 report commissioned by the...,,http://birchtreecenter.org/learn/autism,
2,3,What causes autism? Can it be cured?,The causes of this complex disorder remain unc...,,http://birchtreecenter.org/learn/autism,
3,4,Why doesn’t intervention center refer to its s...,Our students are children or youth who are cha...,,http://birchtreecenter.org/learn/autism,
4,5,What are the types of Autism Spectrum Disorders?,Autistic Disorder; Asperger Syndrome; Pervasiv...,,http://dhss.alaska.gov/dph/wcfh/Pages/autism/s...,


In [5]:
print(data.shape)

(226, 6)


# 3. Data Preprocessing

In [6]:
# Select long_answer from the data
long_answer = data.get('Long_Answer')

# Preprocess the long_answer
long_answer = long_answer.map(lambda x: ' '.join(text_preprocessing(x)))

# Vectorize the answers (one-hot)
sparse_vectorizer = CountVectorizer(strip_accents = 'unicode')
sparse_vectors = sparse_vectorizer.fit_transform(long_answer)

# (226, 2753)
print(sparse_vectors.shape)

(226, 2753)


# 4. Build Topic Model using LDA

In [7]:
# Your super power to define number of topics
n_topics = 4

# Run LDA to generate topics/clusters
lda = LatentDirichletAllocation(n_components=n_topics, max_iter=1000,
                                learning_method='online',
                                random_state=0)

lda.fit(sparse_vectors)

LatentDirichletAllocation(learning_method='online', max_iter=1000,
                          n_components=4, random_state=0)

# 5. Display the resulting topics/clusters of ASD FAQ's Long_Answer field

In [8]:
# Print the top-n key words
def print_top_words(model, feature_names, n_top_words):
    for topic_idx, topic in enumerate(model.components_):
        print("Topic #%d:" % topic_idx)
        print(" ".join([feature_names[i] for i in topic.argsort()[:-n_top_words - 1:-1]]))
    print()

In [9]:
# Show the first n_top_words key words
n_top_words = 10
feature_names = sparse_vectorizer.get_feature_names()

for i, topic in enumerate(lda.components_):
    print('Topic {num}'.format(num=i+1))
    print(" ".join([feature_names[i] for i in topic.argsort()[:-n_top_words - 1:-1]]))
    print()

Topic 1
autism disorder asd child may spectrum people behavior social cause

Topic 2
child autism may treatment intervention diagnosis early help parent therapy

Topic 3
institute national autism tel information health disorder fax behavior md

Topic 4
ability assessment behaviour concern specific characteristic academic ot memory diet



In [13]:
for j in range(1,5):
    print("Belongs to Topic: ", j)
    print('-'*15)
    count = 0
    for i in range(data.shape[0]):
        if(lda.transform(sparse_vectors[i]).argmax()+1 ==j):
            print("Question: ", data.iloc[i,1])
            print("Answer: ", data.iloc[i,2])
            print()
            print('-'*50)
            count+=1
            if(count==5):
                break
    print('*'*100)

Belongs to Topic:  1
---------------
Question:  What are Autism Spectrum Disorders (ASD)?

--------------------------------------------------
Question:  How common is autism?
Answer:  According to a 2020 report commissioned by the U.S. Centers for Disease Control and Prevention, approximately one in 54 American children has been diagnosed with autism or a closely related neurodevelopmental disorder—a dramatic increase over the past decade.

--------------------------------------------------
Question:  What causes autism? Can it be cured?
Answer:  The causes of this complex disorder remain uncertain. Instructional methods such as Applied Behavior Analysis have proven effective in helping individuals with autism learn to overcome many of the challenges that autism presents. These interventions have proven particularly effective when started during the first several years of a child’s life.

--------------------------------------------------
Question:  Why doesn’t intervention center refe

Question:  What are the clinical issues that should be assessed in adult ASD?
Answer:  When a person is referred for an ASD assessment, the assessor looks not only at the specific characteristics of the ASD, but also considers features of other alternative or concurrent disorders. Psychologists will review history and current behaviours and concerns, as well as administer a variety of measures to determine a diagnosis. Specific concerns to be addressed in adult assessment may include: [] Intellectual/Cognitive ability: . assesses intellectual abilities; specifically, verbal and perceptual processing abilities; [] Academic ability: provides information about individual academic achievement overall, and identifies areas of academic strengths and challenges; [] Memory and attention: examines ability to remember or recognize information in short- and long- term memory, as well as working memory, and determines an individual’s ability to attend; [] Functional or Life-skills: assessment of f

# 6. Interpret the identified topics (using top 10 words)

### Topic 1 is about: Introduction/Overview of ASD. What happens in ASD.


### Topic 2 is about: Treatment of ASD


### Topic 3 is about: More/Other/Related/Indirect info to ASD


### Topic 4 is about: Assessment/Check for ASD