# Collect Trends
- This Notebook `downloads and saves` the `Trends` for the `current time` in the `DB`.

- *Note_1: If you already downloaded today, the program will not let you do it again and will notify you.*
- *Note_2: Don't worry, you can run it several times to verify it.*

In [None]:
from typing import Generator
from pathlib import Path

from transformers import pipeline

from twitter_trends import ArgsTwitterTrends, ParamsTwitterTrends, WOEIDCountry, requests_and_process
from scraping_kit import BotScraper, load_db_and_bots

path_data = Path("data")
db_tw, bots = load_db_and_bots(path_data, "scrape_tw")

#req_args = ArgsTwitterTrends(data=ParamsTwitterTrends(woeid=WOEIDCountry.united_states))
#idx_bot_choiced, bot_choiced = bots.random_bot()
#insert_one_result_trend = requests_and_process(db_tw, req_args, bot_choiced)
#insert_one_result_trend

In [None]:
#for doc in db_tw.coll.trends.find():
#    for trend in doc["trends"]:
#        req_args = ArgsSearch(params=ParamsSearch(query=trend["query"]))
#        trend["topic"] = None #get_topic(req_args)
#    db_tw.coll.trends.update_one({"_id": doc["_id"]}, {"$set": {"trends": doc["trends"]}})

In [None]:
from typing import List, Tuple
from datetime import datetime
import requests
from requests import Response
from concurrent.futures import ThreadPoolExecutor, as_completed

from twitter45.params import ArgsSearch, ParamsSearch
from scraping_kit.db.models import Trends, Topic, Search, User
from scraping_kit import DBTwitter

In [None]:
summarizer = pipeline("summarization")
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

In [None]:
#classes = [
#    "arts & culture",
#    "business & finance",
#    "careers",             # FIXME
#    "entertainment",
#    "fashion & beauty",
#    "food",
#    "gaming",
#    "hobbies & interests",
#    "movies and tv",
#    "music",
#    "news",
#    "outdoors",
#    "science",
#    "sports",
#    "technology",
#    "travel"
#]
#import json
#with open("CLASSES_TWITTER.json", "r") as f:
#    TOPICS1 = list(json.load(f).keys())
    

In [None]:
def summary_text(summarizer, text: str, max_length=30) -> str:
    input_length = len(text.split())
    if input_length < max_length:
        return text
    adjusted_max_length = max(30, input_length // 2)
    return summarizer(text, max_length=adjusted_max_length)[0]["summary_text"]


def response_search_to_classify(response: Response, summarizer, classifier, classes, multi_label=False) -> dict:
    timeline: list = response.json()["timeline"]
    timeline.sort(key=lambda t: 0 if t["views"] is None else int(t["views"]), reverse=True)
    c = classifier(
        "\n".join((summary_text(summarizer, t["text"]) for t in timeline[:10])),
        candidate_labels = classes,
        multi_label = multi_label
    )
    c["text"] = c.pop("sequence")
    return c


def iter_args_search(trends_doc: dict) -> Generator[ArgsSearch, None, None]:
    for trend in trends_doc["trends"]:
        yield ArgsSearch(params=ParamsSearch(query=trend["name"]))


def get_tweets_search(
        db_tw: DBTwitter,
        bot: BotScraper,
        req_args: ArgsSearch
    ) -> Tuple[str, Response, datetime] | None:
    """ `query_str` must be the 'query' element within the dataframe."""
    trend_name = req_args.params.query

    # Se verifica que el topic para ese trend no exista.
    doc = db_tw.coll.topics.find_one({"trend_name": trend_name})
    if doc is None:
        response, creation_date = bot.get_response(req_args)
        return trend_name, response, creation_date


In [None]:
import os

idx_bot, bot = bots.random_bot()
for i, trends_doc in enumerate(db_tw.coll.trends.find()):
    print(f"----- Documento número: {i} -----")
    with ThreadPoolExecutor(max_workers=10) as pool:
        iter_futures = (pool.submit(get_tweets_search, db_tw, bot, req_args)
                        for req_args in iter_args_search(trends_doc))
        
        for future in as_completed(iter_futures):
            search = future.result()
            if search is not None:
                trend_name, response, creation_date = search
                if response.status_code == 200:
                    search_json = response.json()
                    search_json["creation_date"] = creation_date
                    db_tw.coll.search.insert_one(search_json)
                else:
                    os.system(f"echo 'error: {trend_name} | {response} | {response.text}' >> log_search.txt")
                #    topic = Topic(
                #        trend_name = trend_name,
                #        topic_classes = response_search_to_classify(response, summarizer, classifier, TOPICS1, multi_label=True),
                #        creation_date = creation_date
                #    )
                #    db_tw.coll.save_topic(topic)
                #    print(f"Generated topic: {trend_name}")

In [None]:
i = 0
for doc in db_tw.coll.search.find():
    i+=1


In [None]:
import json
_p = "simple_response.json"
#with open(_p, "w") as f:
#    json.dump(response.json(), f)

In [None]:
from scraping_kit.db.models.search import Search
from datetime import datetime

with open(_p, "r") as f:
    search = json.load(f)

search = Search(**search, creation_date=datetime.now())
search.model_dump()


In [None]:
CLASSES_TWITTER = {
    "arts & culture": [
        "Animation (Category)",
        "Art (Category)",
        "Astrology (Followable)",
        "Books (Category)",
        "Comics (Followable)",
        "Dance (Category)",
        "Horoscope (Category)",
        "Sci-fi and fantasy (Followable)",
        "Writing (Category)"
    ],
    "business & finance": [
        "Business & finance (Followable)",
        "Business & finance news (Followable)",
        "Business personalities (Category)",
        "Business professions (Category)",
        "Cryptocurrencies (Category)",
        "FinTech (Followable)",
        "Investing (Followable)",
        "Nonprofits (Followable)",
        "Small business (Followable)",
        "Startups (Followable)",
        "Venture capital (Followable)"
    ],
    "careers": [
        #"Accounting (Followable)",
        #"Advertising (Followable)",
        "Education (Category)",
        "Fields of study (Category)",
        "Marketing (Followable)"
    ],
    "entertainment": [
        "Entertainment (Followable)",
        "Celebrities (Category)",
        "Comedy (Category)",
        "Digital creators (Category)",
        "Entertainment brands (Category)",
        "Popular franchises (Category)",
        "Theater (Category)"
    ],
    "fashion & beauty": [
        "Beauty (Category)",
        "Fashion (Category)"
    ],
    "food": [
        "Food (Followable)",
        "Chefs (Followable)",
        "Cooking (Category)"
    ],
    "gaming": [
        "Gaming (Followable)",
        "Esports (Category)",
        "Game developers & publishers (Category)",
        "Gaming news (Followable)",
        "Gaming personalities & esports players (Category)",
        "Tabletop gaming (Category)",
        "Video game platforms & hardware (Category)",
        "Video games (Category)"
    ],
    "hobbies & interests": [
        "Animals (Category)",
        "Anime (Followable)",
        "At home (Category)",
        "Collectibles (Category)",
        "Family (Category)",
        "Fitness (Category)",
        "Podcasts (Category)",
        "Transportation (Category)",
        "Unexplained phenomena (Category)"
    ],
    "movies and tv": [
        "Movies (Category)",
        "Television (Category)"
    ],
    "music": [
        "Music (Followable)",
        "Alternative (Category)",
        "Blues (Followable)",
        "Bollywood music (Category)",
        "C-pop (Category)",
        "Classic rock (Followable)",
        "Classical music (Category)",
        "Country music (Category)",
        "Dance music (Category)",
        "Electronic music (Category)",
        "Experimental music (Followable)",
        "Folk Music (Followable)",
        "Hip-hop & rap (Category)",
        "Indie spotlight (Followable)",
        "J-pop (Category)",
        "Jazz (Followable)",
        "K-hip hop (Category)",
        "K-pop (Category)",
        "Metal (Category)",
        "Music brands (Followable)",
        "Music festivals (Followable)",
        "Music news (Followable)",
        "Musical instruments (Category)",
        "Opera (Followable)",
        "Pop (Category)",
        "Punjabi music (Followable)",
        "Punk (Followable)",
        "R&B and soul (Category)",
        "Radio stations (Category)",
        "Reggaeton (Category)",
        "Rock (Category)",
        "Soft rock (Followable)",
        "World music (Category)"
    ],
    "news": [
        "Arts and culture news (Followable)",
        "Business & finance news (Followable)",
        "COVID-19 (Category)",
        "Gaming news (Followable)",
        "Health news (Followable)",
        "Local news (Category)",
        "Movie news (Followable)",
        "Music news (Followable)",
        "Science news (Followable)",
        "Social movements (Category)",
        "Sports news (Followable)",
        "Tech news (Followable)",
        "US national news (Followable)",
        "World news (Followable)"
    ],
    "outdoors": [
        "Birdwatching (Followable)",
        "Fishing (Followable)",
        "Hunting (Followable)",
        "Nature (Followable)",
        "Rock climbing (Followable)"
    ],
    "science": [
        "Science (Followable)",
        "Archaeology (Followable)",
        "Biology (Category)",
        "Chemistry (Followable)",
        "Conservation & environmentalism (Followable)",
        "Geography (Followable)",
        "Geology (Followable)",
        "Physics (Followable)",
        "Science news (Followable)",
        "Space & astronomy (Followable)",
        "Weather (Followable)"
    ],
    "sports": [
        "Sports (Followable)"
        "Australian rules football (Category)"
        "Australian rules football (Followable)"
        "Australian Football League (Followable)"
        "Auto racing (Followable)"
        "Formula 1 (Category)"
        "IndyCar (Followable)"
        "NASCAR (Category)"
        "Baseball (Followable)"
        "MLB (Category)"
        "Basketball (Followable)"
        "NBA (Category)"
        "NCAA Basketball (Category)"
        "WNBA (Category)"
        "Billiards (Category)"
        "Cheerleading (Followable)"
        "Combat Sports (Category)"
        "Cricket (Category)"
        "Cycling (Followable)"
        "Darts (Followable)"
        "Drone racing (Followable)"
        "Fantasy sports (Category)"
        "Football (Category)"
        "Golf (Category)"
        "Gymnastics (Category)"
        "Handball (Followable)"
        "Hockey (Category)"
        "Horse racing & equestrian (Followable)"
        "Lacrosse (Category)"
        "Martial arts (Category)"
        "Motorcycle racing (Category)"
        "Netball (Followable)"
        "Rodeo (Followable)"
        "Rowing (Followable)"
        "Rugby (Category)"
        "Sailing (Followable)"
        "Skateboarding (Category)"
        "Soccer (Category)"
        "Softball (Followable)"
        "Sports icons (Category)"
        "Sports journalists & coaches (Category)"
        "Sports news (Followable)"
        "Surfing (Followable)"
        "Swimming (Followable)"
        "Table tennis (Followable)"
        "Tennis (Category)"
        "Track & field (Followable)"
        "Triathlon (Followable)"
        "Volleyball (Followable)"
    ],
    "technology": [
        "Technology (Followable)",
        "Augmented reality (Followable)",
        "Cloud computing (Followable)",
        "Cloud platforms (Followable)",
        "Computer programming (Category)",
        "Cryptocurrencies (Category)",
        "Data science (Category)",
        "Databases (Followable)",
        "Drone technology (Followable)",
        "FinTech (Followable)",
        "Information security (Category)",
        "Internet of things (Followable)",
        "Tech brands (Category)",
        "Tech news (Followable)",
        "Tech personalities (Category)",
        "Virtual reality (Followable)"
    ],
    "travel": [
        "Travel (Followable)",
        "Adventure travel (Category)",
        "Air travel (Followable)",
        "Business travel (Followable)",
        "Cruises (Followable)",
        "Destinations (Category)",
        "Luxury travel (Followable)",
        "Museums and institutions (Followable)",
        "National parks (Followable)",
        "Theme parks (Followable)",
        "Travel guides (Followable)"
    ]
}

for k, v in CLASSES_TWITTER.items():
    v = [c.split("(")[0].strip() for c in v]
    CLASSES_TWITTER[k] = v


In [None]:
CLASSES_TWITTER = {
    "arts & culture": [
        "Animation (Category)",
        "Art (Category)",
        "Astrology (Followable)",
        "Books (Category)",
        "Comics (Followable)",
        "Dance (Category)",
        "Horoscope (Category)",
        "Sci-fi and fantasy (Followable)",
        "Writing (Category)"
    ],
    "business & finance": [
        "Business & finance (Followable)",
        "Business & finance news (Followable)",
        "Business personalities (Category)",
        "Business professions (Category)",
        "Cryptocurrencies (Category)",
        "FinTech (Followable)",
        "Investing (Followable)",
        "Nonprofits (Followable)",
        "Small business (Followable)",
        "Startups (Followable)",
        "Venture capital (Followable)"
    ],
    "careers": [
        "Accounting (Followable)",
        "Advertising (Followable)",
        "Education (Category)",
        "Fields of study (Category)",
        "Marketing (Followable)"
    ],
    "entertainment": [
        "Entertainment (Followable)",
        "Celebrities (Category)",
        "Comedy (Category)",
        "Digital creators (Category)",
        "Entertainment brands (Category)",
        "Popular franchises (Category)",
        "Theater (Category)"
    ],
    "fashion & beauty": [
        "Beauty (Category)",
        "Fashion (Category)"
    ],
    "food": [
        "Food (Followable)",
        "Chefs (Followable)",
        "Cooking (Category)"
    ],
    "gaming": [
        "Gaming (Followable)",
        "Esports (Category)",
        "Game developers & publishers (Category)",
        "Gaming news (Followable)",
        "Gaming personalities & esports players (Category)",
        "Tabletop gaming (Category)",
        "Video game platforms & hardware (Category)",
        "Video games (Category)"
    ],
    "hobbies & interests": [
        "Animals (Category)",
        "Anime (Followable)",
        "At home (Category)",
        "Collectibles (Category)",
        "Family (Category)",
        "Fitness (Category)",
        "Podcasts (Category)",
        "Transportation (Category)",
        "Unexplained phenomena (Category)"
    ],
    "movies and tv": [
        "Movies (Category)",
        "Television (Category)"
    ],
    "music": [
        "Music (Followable)",
        "Alternative (Category)",
        "Blues (Followable)",
        "Bollywood music (Category)",
        "C-pop (Category)",
        "Classic rock (Followable)",
        "Classical music (Category)",
        "Country music (Category)",
        "Dance music (Category)",
        "Electronic music (Category)",
        "Experimental music (Followable)",
        "Folk Music (Followable)",
        "Hip-hop & rap (Category)",
        "Indie spotlight (Followable)",
        "J-pop (Category)",
        "Jazz (Followable)",
        "K-hip hop (Category)",
        "K-pop (Category)",
        "Metal (Category)",
        "Music brands (Followable)",
        "Music festivals (Followable)",
        "Music news (Followable)",
        "Musical instruments (Category)",
        "Opera (Followable)",
        "Pop (Category)",
        "Punjabi music (Followable)",
        "Punk (Followable)",
        "R&B and soul (Category)",
        "Radio stations (Category)",
        "Reggaeton (Category)",
        "Rock (Category)",
        "Soft rock (Followable)",
        "World music (Category)"
    ],
    "news": [
        "Arts and culture news (Followable)",
        "Business & finance news (Followable)",
        "COVID-19 (Category)",
        "Gaming news (Followable)",
        "Health news (Followable)",
        "Local news (Category)",
        "Movie news (Followable)",
        "Music news (Followable)",
        "Science news (Followable)",
        "Social movements (Category)",
        "Sports news (Followable)",
        "Tech news (Followable)",
        "US national news (Followable)",
        "World news (Followable)"
    ],
    "outdoors": [
        "Birdwatching (Followable)",
        "Fishing (Followable)",
        "Hunting (Followable)",
        "Nature (Followable)",
        "Rock climbing (Followable)"
    ],
    "science": [
        "Science (Followable)",
        "Archaeology (Followable)",
        "Biology (Category)",
        "Chemistry (Followable)",
        "Conservation & environmentalism (Followable)",
        "Geography (Followable)",
        "Geology (Followable)",
        "Physics (Followable)",
        "Science news (Followable)",
        "Space & astronomy (Followable)",
        "Weather (Followable)"
    ],
    "sports": [
        "Sports (Followable)"
        "Australian rules football (Category)"
        "Australian rules football (Followable)"
        "Australian Football League (Followable)"
        "Auto racing (Followable)"
        "Formula 1 (Category)"
        "IndyCar (Followable)"
        "NASCAR (Category)"
        "Baseball (Followable)"
        "MLB (Category)"
        "Basketball (Followable)"
        "NBA (Category)"
        "NCAA Men’s Basketball (Category)"
        "NCAA Women’s Basketball (Category)"
        "WNBA (Category)"
        "Billiards (Category)"
        "Cheerleading (Followable)"
        "Combat Sports (Category)"
        "Cricket (Category)"
        "Cycling (Followable)"
        "Darts (Followable)"
        "Drone racing (Followable)"
        "Fantasy sports (Category)"
        "Football (Category)"
        "Golf (Category)"
        "Gymnastics (Category)"
        "Handball (Followable)"
        "Hockey (Category)"
        "Horse racing & equestrian (Followable)"
        "Lacrosse (Category)"
        "Martial arts (Category)"
        "Motorcycle racing (Category)"
        "Netball (Followable)"
        "Rodeo (Followable)"
        "Rowing (Followable)"
        "Rugby (Category)"
        "Sailing (Followable)"
        "Skateboarding (Category)"
        "Soccer (Category)"
        "Softball (Followable)"
        "Sports icons (Category)"
        "Sports journalists & coaches (Category)"
        "Sports news (Followable)"
        "Surfing (Followable)"
        "Swimming (Followable)"
        "Table tennis (Followable)"
        "Tennis (Category)"
        "Track & field (Followable)"
        "Triathlon (Followable)"
        "Volleyball (Followable)"
    ],
    "technology": [
        "Technology (Followable)",
        "Augmented reality (Followable)",
        "Cloud computing (Followable)",
        "Cloud platforms (Followable)",
        "Computer programming (Category)",
        "Cryptocurrencies (Category)",
        "Data science (Category)",
        "Databases (Followable)",
        "Drone technology (Followable)",
        "FinTech (Followable)",
        "Information security (Category)",
        "Internet of things (Followable)",
        "Tech brands (Category)",
        "Tech news (Followable)",
        "Tech personalities (Category)",
        "Virtual reality (Followable)"
    ],
    "travel": [
        "Travel (Followable)",
        "Adventure travel (Category)",
        "Air travel (Followable)",
        "Business travel (Followable)",
        "Cruises (Followable)",
        "Destinations (Category)",
        "Luxury travel (Followable)",
        "Museums and institutions (Followable)",
        "National parks (Followable)",
        "Theme parks (Followable)",
        "Travel guides (Followable)"
    ]
}

for k, v in CLASSES_TWITTER.items():
    v = [c.split("(")[0].strip() for c in v]
    CLASSES_TWITTER[k] = v


In [None]:
import json

with open("CLASSES_TWITTER.json", "w") as f:
    json.dump(CLASSES_TWITTER, f)

In [None]:
LIST_CLASSES_TWITTER = []
for v in CLASSES_TWITTER.values():
    LIST_CLASSES_TWITTER.extend(v)

In [None]:
import json
with open("LIST_CLASSES_TWITTER.json", "w") as f:
    json.dump(LIST_CLASSES_TWITTER, f)