In [None]:
Pytrends

We are using Pytrends to develop a recommendation system for herbs. 
Pytrends is a useful tool for developing a recommendation system and can be used to suggest the most popular herbs to users based on their preferences.

In [None]:
import os
os.chdir('pytrends/')

import sys
sys.path.insert(0,os.getcwd())

print(os.getcwd())
print(sys.path)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from typing import List, Dict

from pytrends.request import TrendReq

from tqdm import tqdm
from itertools import islice
from IPython.display import display
import time

from warnings import warn

/home/jupyter/wt23-naturdoc/ds/feature_trends_popularity/pytrends
['/home/jupyter/wt23-naturdoc/ds/feature_trends_popularity/pytrends', '/home/jupyter/wt23-naturdoc/ds/feature_trends_popularity', '/opt/conda/lib/python37.zip', '/opt/conda/lib/python3.7', '/opt/conda/lib/python3.7/lib-dynload', '', '/home/jupyter/.local/lib/python3.7/site-packages', '/opt/conda/lib/python3.7/site-packages', '/opt/conda/lib/python3.7/site-packages/IPython/extensions', '/home/jupyter/.ipython']


Aim

The aim of this function is to provide a simple and efficient way to rank a list of items by their popularity in Google search using the pytrends library. The function is useful for tasks such as market research, product development, and trend analysis.

Documentation for rank_by_popularity function

This function ranks a list of items by their popularity in Google search. It takes in a list of lists of strings called "group", where each sublist represents a group of items that need to be ranked. The function returns a list of dictionaries, where each dictionary represents the ranked items in a group.


 

In [2]:
def pytrend_score_group(terms : list, print_table : bool = False) -> Dict[str,int]:
    pt = TrendReq(retries=5)
    pt.build_payload(terms)
    tmp_df = pt.interest_over_time()
    tmp_df = tmp_df[terms].mean()
    
    if print_table:
        display(tmp_df)
        
    ranks = tmp_df.rank().to_dict()
    return ranks 

def break_into_groups(my_list : list, times : int):
    """times is the size of the group"""
    sublists = []
    for i in range(0, len(my_list), times):
        sublists.append(my_list[i:i+5])
    return sublists

def rank_by_popularity(group: List[List[str]], debug = True) -> List[Dict[str, int]]:
    ranked_groups = []
    for herbs in group:
        ranked_herbs = {}
        
        if debug:
            for herb in herbs:
                score = random.randint(1, 5)
                ranked_herbs[herb] = score
        
        else:
            ranked_herbs = pytrend_score_group(herbs)
            
        #print(ranked_herbs)
        ranked_groups.append(ranked_herbs)
    return ranked_groups

 A list of herbs is used for testing purposes.

In [109]:
test_herbs = ["St. John's Wort",
'Cranberry',
'Agrimony',
'Cherry stalks',
'Cannabis flowering tops',
'Herbal tea combinations for use in cough and cold',
"St. Benedict's thistle (also blessed thistle, holy thistle or spotted thistle)",
'Lime flower',
'Guarana',
'Rosemary Oil',
'Rosemary leaf',
'Sweet Fennel']

print("Phase 1 - Break into groups:")
print(break_into_groups(test_herbs,times=5))

print("Phase 2 - Rank by popularity:")
rank_by_popularity(break_into_groups(test_herbs,times=5), debug=False)

Phase 1 - Break into groups:
[["St. John's Wort", 'Cranberry', 'Agrimony', 'Cherry stalks', 'Cannabis flowering tops'], ['Herbal tea combinations for use in cough and cold', "St. Benedict's thistle (also blessed thistle, holy thistle or spotted thistle)", 'Lime flower', 'Guarana', 'Rosemary Oil'], ['Rosemary leaf', 'Sweet Fennel']]
Phase 2 - Rank by popularity:
{"St. John's Wort": 2.5, 'Cranberry': 5.0, 'Agrimony': 2.5, 'Cherry stalks': 2.5, 'Cannabis flowering tops': 2.5}
{'Herbal tea combinations for use in cough and cold': 1.5, "St. Benedict's thistle (also blessed thistle, holy thistle or spotted thistle)": 1.5, 'Lime flower': 3.0, 'Guarana': 5.0, 'Rosemary Oil': 4.0}
{'Rosemary leaf': 2.0, 'Sweet Fennel': 1.0}


[{"St. John's Wort": 2.5,
  'Cranberry': 5.0,
  'Agrimony': 2.5,
  'Cherry stalks': 2.5,
  'Cannabis flowering tops': 2.5},
 {'Herbal tea combinations for use in cough and cold': 1.5,
  "St. Benedict's thistle (also blessed thistle, holy thistle or spotted thistle)": 1.5,
  'Lime flower': 3.0,
  'Guarana': 5.0,
  'Rosemary Oil': 4.0},
 {'Rosemary leaf': 2.0, 'Sweet Fennel': 1.0}]

Then i will apply the Pytrends to the  dataset Ema

**"Herbal Medicines" EMA Dataset**

In [4]:
# Split the list of herbs into sublists of 5 herbs each
herbs=[
"St. John's Wort",
'Cranberry',
'Agrimony',
'Cherry stalks',
'Cannabis flowering tops',
'Herbal tea combinations for use in cough and cold',
"St. Benedict's thistle (also blessed thistle, holy thistle or spotted thistle)",
'Lime flower',
'Guarana',
'Rosemary Oil',
'Rosemary leaf',
'Sweet Fennel',
'Bitter Fennel',
'California poppy',
'Herbal tea combinations for use in gastrointestinal complaints',
'Willow herb',
'Walnut leaf',
'Javanese turmeric',
'Tribulus herb ',
'Couch grass rhizome',
'Green tea',
'Centella',
'Bitter Fennel Fruit Oil',
'Juniper cone berry',
'Fenugreek',
'Dittany of Crete herb',
'White horehound',
"St. John's Wort",
'Liquorice Root',
'Iceland moss',
"Brewer's yeast ",
"Salvia miltiorrhiza root and rhizome ('Danshen')",
'Dandelion root',
'Rose flower',
'Castor oil',
'Senna leaf',
'Cascara',
'Hedge mustard',
'Comfrey Root',
'Ironwort',
'Senna pods',
'Rhubarb',
'Dried juice of Aloes leaves',
'Frangula bark',
'Nettle root',
'Arctic root',
'Nettle Leaf',
'Chicory root',
'Bladderwrack',
'Pumpkin seed',
'Ash Leaf',
'Eucalyptus leaf',
'Gumweed herb',
'Restharrow root',
'Tea-tree oil',
'Dried Bilberry fruit',
'Pygeum africanum bark',
'Majoram',
'Mouse-ear hawkweed',
'Fresh Bilberry fruit',
'Knotgrass herb',
'Matricaria Flower',
'Sandy everlasting',
'Ginkgo leaf',
'Java Tea',
'Mastic (Mastix, Pistaciae lentisci resina)',
'Maté Leaf',
'Bergamot oil',
'Herbal tea combinations for use as sedatives',
'Herbal tea combinations for use in loss of appetite',
'Wild Pansy',
'Saw Palmetto Fruit',
"Butcher's Broom",
'Clove',
'Clove oil',
'Three-lobed sage leaf',
'Milkthistle Fruit',
'Evening primrose oil',
'Mallow leaf',
'Kava-kava',
'Lovage root',
'Peony root, red',
'Ginseng',
'Peony root, white',
'Lemon verbena leaf',
'Pink rock-rose',
'Bogbean leaf',
'Cola',
'Garlic',
'Cinnamon Bark Oil',
'Raspberry leaf',
'Cinnamon',
'peppermint leaf',
'Yarrow',
'Eucalyptus Oil',
'Horse-chestnut bark',
'Lavender',
'Arnica flower',
'Woody nightshade stem',
'Roman Chamomile Flower',
'Caraway oil',
'European Goldenrod',
'Juniper Oil',
'Feverfew',
'Thyme oil',
'Rupturewort',
'Meadowsweet',
'Meadowsweet Flower',
'Horse-Chestnut Seed',
'Peppermint oil',
'Tormentil',
'Oak Bark',
'Wormwood',
'Dandelion Root with Herb',
'Burdock Root',
'Caraway fruit',
'Valerian Root and Hop Strobile',
'Dandelion Leaf',
'Pelargonium root',
'Ginger',
'Caraway fruit',
'Hamamelis Leaf',
'Hamamelis Bark',
'Hamamelis Distillate',
'Malabar-nut leaf',
'Oat Herb',
'Oat Fruit',
'Shepherds Purse',
'Polypody Rhizome',
'Yarrow Flower',
'Wild Strawberry Leaf',
'Motherwort',
'Gentian Root',
'Green bean pod',
'Mallow flower',
'Narrow-leaved coneflower root',
'Artichoke Leaf',
'Turmeric',
'Agnus Castus Fruit',
'Pale Coneflower Root',
'Ivy leaf',
'Soya',
'Black Cohosh',
'Elder Flower',
'Mullein Flower',
'Calendula Flower',
'Angelica sinensis root',
'Kalmegh',
'Winter-cherry root',
'Melilot',
'Bearberry Leaf',
'Blackcurrant Leaf',
'Soya-bean lecithin',
'Grapevine Leaf',
'Olive leaf',
'Purple Coneflower Root',
'Willow Bark',
'Diuretic herbal tea combinations',
'Sage Leaf',
'Soya-bean oil',
'Boldo Leaf',
'Devil’s Claw Root',
'Marshmallow Root',
'Thyme',
'Hop strobile',
'Anise Oil',
'Aniseed',
'Primula flower',
'Passion Flower',
'Peru balsam',
'Thyme and Primula root',
'Valerian essential oil',
'Hawthorn Leaf and Flower',
'Horsetail herb',
'Valerian Root',
'Matricaria Oil',
'Purple Coneflower Herb',
'Eleutherococcus',
'Centaury',
"Cat's Claw",
'Birch Leaf',
'Capsicum',
'Katula',
'Melissa leaf',
'Lavender Oil',
'Ispaghula husk',
'Psyllium seed',
'Ispaghula Seed',
'Elderberry',
'Marigold',
'Mistletoe',
'Primula root',
'Silver lime flower',
'Onion',
'Ribwort Plantain',
'Greater Celandine',
'Fumitory',
'Eyebright',
'Nettle Herb',
'Sage Oil',
'Linseed']

In [9]:
herbs = list(set(herbs))

This code block is part of a larger program that appears to perform a ranking of herbs based on their popularity, using the Google Trends API to collect data on the search volume of each herb.After all rounds of ranking are complete, the program creates a DataFrame from the herb_score dictionary, sorts it by score and herb, and stores the sorted list of herbs in sorted_herbs.


In [17]:
GROUP_SIZE = 5
groups = break_into_groups(herbs, GROUP_SIZE)
herb_score = {herbs: 0 for herbs in herbs} #dict with 203 keys and all zeroes
herb_i = {herbs: 0 for herbs in herbs} #dict with 203 keys and all zeroes
ROUNDS = 10

for rnd in tqdm(range(ROUNDS)):
    
    # we need to iterate the groups
    for group in groups:

        #rank the herbs inside the group
        try:
            ranked_group = rank_by_popularity([group], debug=False)
        except:
            
            try:
                warn("Max retries archived, let's wait 30 seconds to keep the process")
                time.sleep(60.1)
                ranked_group = rank_by_popularity([group], debug=False)
            
            except:
                break
                
            time.sleep(0.5)
        #assign the points of the herb on herb_score
        ranked_group = ranked_group[0] #fix to transform ranked_group as a dictionary
        
        for herb, score in ranked_group.items():
            herb_score[herb] += ranked_group[herb]
            herb_i[herb] += 1
            
    # create a dataframe from herb_score
    df = pd.DataFrame.from_dict(herb_score, orient='index', columns=['score'])
    df.index.name = 'herb'
    df.reset_index(inplace=True)

    # sort the dataframe by score and herb
    df = df.sort_values(by=['score','herb'], ascending=False)

    # get the sorted herbs
    sorted_herbs = df['herb'].tolist()

    # break the sorted herbs into groups
    groups = break_into_groups(sorted_herbs,GROUP_SIZE)
    
       
#sort the dataframe by score and herb
df = df.sort_values(by=['score','herb'], ascending=False)

100%|██████████| 10/10 [11:04<00:00, 66.41s/it]


In [20]:
df.sort_values(by='score', ascending=False).tail(20)

Unnamed: 0,herb,score
2,"Peony root, white",1.5
164,Horsetail herb,1.0
142,Cinnamon Bark Oil,1.0
67,Couch grass rhizome,1.0
152,Dittany of Crete herb,1.0
117,Fresh Bilberry fruit,1.0
173,Greater Celandine,1.0
30,Hamamelis Distillate,1.0
11,Herbal tea combinations for use in cough and cold,1.0
155,Herbal tea combinations for use in loss of app...,1.0


In [21]:
df.to_csv('popular_herbs.csv', index=False)