In [1]:
import cohere
import pandas as pd
import requests
import datetime
from tqdm import tqdm
pd.set_option('display.max_colwidth', None)

def get_post_titles(**kwargs):
    """ Gets data from the pushshift api. Read more: https://github.com/pushshift/api """
    base_url = f"https://api.pushshift.io/reddit/search/submission/"
    payload = kwargs
    request = requests.get(base_url, params=payload)
    return [a['title'] for a in request.json()['data']]

api_key = 'WEzAHVi3WSNaGJdsHGkEPreheqssa0ihmCXc7Kx4'

co = cohere.Client(api_key)


symptoms_examples = [
("none", "Could you please tell me what it is!"),
("chest pain", "Chest pain!"),
("blurred vision, headaches", "Occasionally blurred vision with headaches"),
("food poisoning", "I think I may have some minor food poisoning of some sorts"),
("fingers shaking", "My fingers won't stop shaking!"),
("nausea and dry mouth", "Sudden onset of nausea and dry mouth"),
]

In [2]:
class cohereExtractor():
    def __init__(self, examples, example_labels, labels, task_desciption, example_prompt):
        self.examples = examples
        self.example_labels = example_labels
        self.labels = labels
        self.task_desciption = task_desciption
        self.example_prompt = example_prompt

    def make_prompt(self, example):
        examples = self.examples + [example]
        labels = self.example_labels + [""]
        return (self.task_desciption +
                "\n---\n".join( [examples[i] + "\n" +
                                self.example_prompt + 
                                 labels[i] for i in range(len(examples))]))

    def extract(self, example):
      extraction = co.generate(
          model='large',
          prompt=self.make_prompt(example),
          max_tokens=10,
          temperature=0.1,
          stop_sequences=["\n"])
      return(extraction.generations[0].text[:-1])


cohereMovieExtractor = cohereExtractor([e[1] for e in symptoms_examples], 
                                       [e[0] for e in symptoms_examples], [],
                                       "", 
                                       "extract the symptoms from the post:")

print(cohereMovieExtractor.make_prompt('<input text here>'))

Could you please tell me what it is!
extract the symptoms from the post:none
---
Chest pain!
extract the symptoms from the post:chest pain
---
Occasionally blurred vision with headaches
extract the symptoms from the post:blurred vision, headaches
---
I think I may have some minor food poisoning of some sorts
extract the symptoms from the post:food poisoning
---
My fingers won't stop shaking!
extract the symptoms from the post:fingers shaking
---
Sudden onset of nausea and dry mouth
extract the symptoms from the post:nausea and dry mouth
---
<input text here>
extract the symptoms from the post:


In [44]:
num_posts = 100

symptoms_list = get_post_titles(size=num_posts, 
      after=str(int(datetime.datetime(2015,1,1,0,0).timestamp())), 
      before=str(int(datetime.datetime(2020,1,1,0,0).timestamp())), 
      subreddit="medical_advice", 
      sort_type="score", 
      sort="desc")

# Show the list
symptoms_list


['This felt like it belonged',
 'FOR THE LOVE OF ALL THINGS HOLY PLEASE USE THE NSFW TAG FOR GROSS THINGS',
 'Which would it be?',
 'My mom send me this pic- she’s 100 miles away from a Dr. 63, F, smoker, alcoholic',
 "This mark appeared on my 2 week old a few days ago, any ideas as to what it is ? Her pediatrician canceled our schedule til Wednesday and I'm getting impatient.",
 'My cut stopped bleeding them dramatically picked up??',
 'Contact dermatitis or something more serious?',
 "Cut myself on rocks, won't heal after over a month. Poor college student, can't afford health insurance. Need more advice.",
 'Not Your Average Bald spot... What the FUCK is this?',
 'Jabbing feeling on right side of head. Lasts a couple of seconds and sometimes I feel paralysed during the pain. Should I be worried?',
 'Scrapped my knee two days ago. Cleaned it then and applied a bandage. Removed it today to clean it and bandage it. Is this normal? or is it infected?',
 'F 18 97lbs. Duration of complain

In [45]:
results = []
for text in tqdm(symptoms_list):
    try:
        extracted_text = cohereMovieExtractor.extract(text)
        results.append(extracted_text)
    except Exception as e:
        print('ERROR: ', e)


100%|█████████████████████████████████████████| 100/100 [01:10<00:00,  1.41it/s]


In [46]:
pd.DataFrame(data={'text': symptoms_list, 'extracted_text': results})


Unnamed: 0,text,extracted_text
0,This felt like it belonged,this felt like it belonged
1,FOR THE LOVE OF ALL THINGS HOLY PLEASE USE THE NSFW TAG FOR GROSS THINGS,none
2,Which would it be?,none
3,"My mom send me this pic- she’s 100 miles away from a Dr. 63, F, smoker, alcoholic",none
4,"This mark appeared on my 2 week old a few days ago, any ideas as to what it is ? Her pediatrician canceled our schedule til Wednesday and I'm getting impatient.",mark
...,...,...
95,I have this gaping cut on my finger and I don’t know what to do next. I cleaned it and bandaged it up but I don’t know if I should do anything else. I held it open so you could see how deep it is. I can’t go to a hospital I’m a selfharmer and I slipped and hit my finger. I’m scared they’ll admit me.,cut on finger
96,"Sometimes I’ll be going about my business and then I’ll suddenly feel like I’m not me. It is hard to explain. I feel like my head is floating and like I’m observing myself doing things. It’s not out of body, it’s more like I’m looking out from myself. Needless to say it’s disturbing.",out of body
97,I have a lump under my testicles,lump under testicles
98,Do I have Skittles Pox? Urgent Care said hives but it’s been 24 hours and 110 MB’s of Prednisone and 100mg of Benadryl and it’s still getting worse.,skittles pox
