# Install Packages

In [1]:
!pip install gdeltdoc


Collecting gdeltdoc
  Downloading gdeltdoc-1.5.0-py3-none-any.whl (13 kB)
Installing collected packages: gdeltdoc
Successfully installed gdeltdoc-1.5.0


# Import Libraties

In [5]:
from gdeltdoc import GdeltDoc, Filters, near, repeat
from enum import Enum

# Class GdeltData definition

In [7]:
class GdeltData(GdeltDoc):
    """
    Class for interacting with the GDELT API for Sentiment Analysis

      There are 5 available modes when making a timeline search:
      timelinevol - a timeline of the volume of news coverage matching the filters, represented as a percentage of the total news articles monitored by GDELT.
      timelinevolraw - similar to timelinevol, but has the actual number of articles and a total rather than a percentage
      timelinelang - similar to timelinevol but breaks the total articles down by published language. Each language is returned as a separate column in the DataFrame.
      timelinesourcecountry - similar to timelinevol but breaks the total articles down by the country they were published in. Each country is returned as a separate column in the DataFrame.
      timelinetone - a timeline of the average tone of the news coverage matching the filters. See GDELT's documentation for more information about the tone metric.

      Filter set example. All of them are optional.
          start_date = "2020-05-01",
          end_date = "2020-05-02",
          num_records = 250,
          keyword = "climate change",
          domain = ["bbc.co.uk", "nytimes.com"],
          country = ["UK", "US"],
          theme = "GENERAL_HEALTH",
          near = near(10, "airline", "carbon"),
          repeat = repeat(5, "planet")
    """
    def __init__(self, num_records:int, filter_string:Filters) -> None:
        """
        Initialize the GdeltData class.
        """
        self.max_depth_json_parsing:int = num_records
        self.filter_string:Filters = filter_string

    def retrive_articles(self):
        """
        Retrieve articles from the GDELT API.
        """
        self.articles = self.article_search(self.filter_string)
        return self.articles

    def retrive_timeline(self,timeline_mode:str):
        """
        Retrieve timeline data from the GDELT API.
        """
        self.timeline = self.timeline_search(timeline_mode, self.filter_string)
        return self.timeline

# Test Box

In [8]:
# Test box
kw:str = "apple"
f=Filters(start_date = "2022-01-01",
        end_date = "2024-07-04",
        num_records = 250,
        keyword = kw);


AaplGdeltData = GdeltData(250, f)
AaplGdeltData.retrive_articles()

AaplGdeltData.articles.to_csv(f"{kw}_articles.csv")




AaplGdeltData.retrive_timeline("timelinevol")

print(AaplGdeltData.articles)
print(AaplGdeltData.timeline)



                                                   url  \
0                 https://natalie.mu/music/news/575156   
1    https://www.163.com/dy/article/J3D38I0A0512B07...   
2    https://finance.sina.com.cn/jjxw/2024-05-29/do...   
3                 https://tech.ifeng.com/c/8ZzNaSAkDNd   
4    https://forums.appleinsider.com/discussion/236561   
..                                                 ...   
245  https://www.forbes.com/sites/tiriasresearch/20...   
246         https://www.huxiu.com/article/3035020.html   
247  https://www.imore.com/ipad/apple-pencil-2-retu...   
248  https://finance.sina.com.cn/tech/mobile/n/n/20...   
249            https://finance.ifeng.com/c/8YnumtoMe54   

                                            url_mobile  \
0                                                        
1    https://m.163.com/dy/article/J3D38I0A0512B07B....   
2                                                        
3                                                        
4            