# Conflict Scenarios - Search
-----

In [1]:
# Read Conflict Scenarios Data From Archive
import pandas as pd # import pandas module

# Read Data
data = pd.read_csv("Conflict Scenarios Research.csv")

# Print Sample
data.sample()

Unnamed: 0,"Describe a past experience you've had that involved conflict with a family member, friend, or significant other. Be as detailed as you like.","Contrary to the previous question, describe a past experience you've had that did not involve conflict with a family member, friend or significant other. Be as detailed as you like."
91,My mother and I don’t really get along. We’re ...,My sister and I tend to go out with me and her...


In [2]:
import spacy
import nltk
from nltk.stem import WordNetLemmatizer
from itertools import chain

class info:
    def __init__(self, documents, lowercasing=False, lemmatization=False):
        self.lowercase = lowercasing
        self.lemmatization = lemmatization
        
        # Load the English language model
        self.nlp = spacy.load("en_core_web_sm")

        # Initialize the WordNet lemmatizer
        if lemmatization:
            self.lemmatizer = WordNetLemmatizer()

        dict = []
        self.tokenized_docs = []
        for item in documents:
            doc = self.nlp(item)
            tokens = [token.text for token in doc] # process token into list
            if lowercasing:
                lowercase_list = [word.lower() for word in tokens] # lowercase all words to improve search
                tokens = lowercase_list
            if lemmatization:
                lemmatized_words = [self.lemmatizer.lemmatize(word) for word in tokens] # lemmatize words to enhace retrieval comprehension
                tokens = lemmatized_words
            self.tokenized_docs.append(tokens) 
            temp = set(tokens) # reduce overhead by performing initial set
            for term in temp:
                dict.append(term)

        dictionary = set(dict)

        self.inverted_index = []
        for term in dictionary:
            temp = [term]
            for doc in self.tokenized_docs:
                for word in doc:
                    if term == word:
                        temp.append(self.tokenized_docs.index(doc))
                        break  # break operation to prevent duplicate postings
            self.inverted_index.append(temp)

    def query(self, phrase):
        # Tokenize input
        tokenize = self.nlp(phrase)
        tokens = [token.text for token in tokenize] # process token into list
        if self.lowercase:
            lowercased = [word.lower() for word in tokens] # lowercase all words to improve search
            tokens = lowercased
        if self.lemmatization:
            lemma = [self.lemmatizer.lemmatize(word) for word in tokens] # final lemmatization of query to match postings
            tokens = lemma
            
    
        # retrieve postings for each token
        retrieve = set(tokens)
        postings = []
        for word in retrieve:
            for post in self.inverted_index:
                if word == post[0]:
                    postings.append(post)
    
        # check for intersection
        combine = list(chain.from_iterable(postings))
        exact_matches = []
        exact_count = len(retrieve)
        for item in combine:
            n = combine.count(item)
            if n == exact_count:
                exact_matches.append(item)
                
        if len(exact_matches) > 0:
            matches = set(exact_matches)
        else:
            matches = set(combine)
            
        # retrieve relevant documents
        results = []
        for item in matches:
            if isinstance(item, int):
                content = documents[item]
                results.append(content)
                   
        return results

In [3]:
# generate search
conflict = data["Describe a past experience you've had that involved conflict with a family member, friend, or significant other. Be as detailed as you like."].tolist()
non_conflict = data["Contrary to the previous question, describe a past experience you've had that did not involve conflict with a family member, friend or significant other. Be as detailed as you like."].tolist()
documents = conflict + non_conflict
information = info(documents, lowercasing=True, lemmatization=True)

def search(phrase):
    print("searching...")
    search = information.query(phrase)
    print("Results = " + str(len(search)))
    print("\n")

    for item in search:
        print(item)
        print("\n")

-----

In [6]:
Search = search("happy")

searching...
Results = 30


When was a teen my father and mother lived in to different places. They loved me but one year I when to live with dad. He push me aside and lock me in my room. All I was allowed to do was school, phone and TV. So drawing become away to destress and feel happy and good in myself


I was in a past abusive relationship, today I reflect back on it and wonder what on earth I thought, but indeed I'm amused,at how we as humans think we can change people into whom we want them to be or how we would like them to behave.. I went out clubbing on my own and came home a bit later than usual. I was not aware that one of his friends had seen  me dancing with another man in the club and had called him and rold him., I was not really flirting but did like the company . Nothing happened with this guy at rhe end of the night i left and went home. When I opened the front door to our apartment, he was waiting for me, however, on the way home I had stopped at a seven eleven to bu