## Import libraries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

## Import dataset

In [None]:
df = pd.read_csv("Data/dataset3.csv")
df.head()

## Observing data

In [None]:
columns = df.columns

In [None]:
for i in columns:
    print(i + " Unique values : " , df[i].unique())


In [None]:
df.isnull().sum()

In [None]:
df.dtypes

## Visualizations

In [None]:
#Data visualization

#Target column data
plt.figure(figsize=(15,5))
sns.countplot(x ='Disorder', data = df)
plt.xlabel('Disorder', size = 15)
plt.ylabel('Identified number of patients', size = 15)
plt.title('Diagnosed disorders of patients with counts', color = 'green', size = 25)
plt.show()

In [None]:
variables = ["feeling nervous", "panic", "breathing rapidly", "sweating", "having trouble in concentration", "having trouble in sleeping", "having trouble with work",
             "feel hopeless", "feel angry", "over react", "have changes in eating", "have suicidal thought", "feeling tired", "have a close friend",
             "have a social media addiction", "gained weight", "have material possessions", "an introvert", "a popping up stressful memory", "having nightmares",
             "avoid people or activities", "feel negative", "trouble concentrating", "blamming yourself"]
for v in variables:
  sns.countplot(x = v, data = df , figsize=(30,30))
  plt.show()

## Converting string data to boolean data type

In [None]:
x=df.loc[:, 'feeling nervous':'blamming yourself'].replace("yes", pd.Series(True, df.columns))
x=x.loc[:, 'feeling nervous':'blamming yourself'].replace("no", pd.Series(False, df.columns))
x.head()

# FP growth algorithm implementation

In [None]:
from mlxtend.frequent_patterns import fpgrowth

In [None]:
final = fpgrowth(x, min_support=0.2 , use_colnames=True )
final.head(25)

In [None]:
final['itemsets']

## Using the frequent itemset for building the system

In [None]:
fin = final['itemsets']
y = list()
for i in fin:
    y.append(list(i))

In [None]:
y[0:22]

In [None]:
# Function to get the list with a given word if answered yes
def getInList(word , test1): 

    t = [i for i in test1 if word in i]

    for d in t:
            d.remove(word)
        
    return t

# Function to get the list without a given word if answered no
def getNotInList(word , temp):
    t = [i for i in temp if word not in i]

    return t



In [None]:
# Get the maximum occuring word in a given list
def getMax(freqList):
    fin = final['itemsets']
    k = list()
    for i in fin:
        k.append(list(i))
    u = dict()
    for i in k[0:22]:
        count = 0
        for j in freqList:
            # print(i,j)
            if i[0] in j:
                count+=1
        u.update({i[0]:count})

    MaxDictVal = max(u, key=u.get)
    
    return MaxDictVal


    

## Testing the built model 

In [None]:
tempwordlist = y
maxword = getMax(tempwordlist)
word = input("do you have  "+ maxword)
for i in range(0,3):
    if word == 'yes':
        tempwordlist = getInList(maxword ,tempwordlist)
        print(tempwordlist)
        maxword = getMax(tempwordlist)
        print(maxword)
        word = input("do you have " +maxword )
    else:
        tempwordlist = getNotInList(maxword ,tempwordlist)
        maxword = getMax(tempwordlist)
        word = input("do you have " +maxword )



In [None]:
r = list()
for i in tempwordlist:
    for j in i:
        if j not in r:
            r.append(j)
r