In [1]:
!pip install pycountry

Collecting pycountry
  Downloading pycountry-24.6.1-py3-none-any.whl.metadata (12 kB)
Downloading pycountry-24.6.1-py3-none-any.whl (6.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m44.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pycountry
Successfully installed pycountry-24.6.1


In [2]:
!pip install python-louvain



Imports

In [3]:
import pandas as pd
import zipfile
import io
import pickle
from scipy.stats import spearmanr
import pycountry
import requests
from matplotlib import cm
import community as community_louvain
from datetime import datetime, timedelta
from tqdm import tqdm
import plotly.express as px
import os
import networkx as nx
import matplotlib.pyplot as plt
from collections import defaultdict
import seaborn as sns
import glob
import plotly.graph_objects as go
from google.colab import drive
import dask.dataframe as dd
import community as community_louvain
import community.community_louvain as community_louvain
drive.mount('/content/drive')
pd.set_option('display.max_columns', None)  # Show all columns
# pd.reset_option('display.max_columns')

Mounted at /content/drive


###Global:

In [4]:
# url = "https://www.gdeltproject.org/data/lookups/CSV.header.dailyupdates.txt"
# headers = requests.get(url).text.strip().split('\t')
# headers

In [5]:
#fixing columns mismatch - there is one attribute missing 3 times (Actor1Geo_ADM2Code., Actor2Geo_ADM2Code.,ActionGeo_ADM2Code.)
headers = ['GLOBALEVENTID',
 'SQLDATE',
 'MonthYear',
 'Year',
 'FractionDate',
 'Actor1Code',
 'Actor1Name',
 'Actor1CountryCode',
 'Actor1KnownGroupCode',
 'Actor1EthnicCode',
 'Actor1Religion1Code',
 'Actor1Religion2Code',
 'Actor1Type1Code',
 'Actor1Type2Code',
 'Actor1Type3Code',
 'Actor2Code',
 'Actor2Name',
 'Actor2CountryCode',
 'Actor2KnownGroupCode',
 'Actor2EthnicCode',
 'Actor2Religion1Code',
 'Actor2Religion2Code',
 'Actor2Type1Code',
 'Actor2Type2Code',
 'Actor2Type3Code',
 'IsRootEvent',
 'EventCode',
 'EventBaseCode',
 'EventRootCode',
 'QuadClass',
 'GoldsteinScale',
 'NumMentions',
 'NumSources',
 'NumArticles',
 'AvgTone',
 'Actor1Geo_Type',
 'Actor1Geo_FullName',
 'Actor1Geo_CountryCode',
 'Actor1Geo_ADM1Code',
 'Actor1Geo_ADM2Code',
 'Actor1Geo_Lat',
 'Actor1Geo_Long',
 'Actor1Geo_FeatureID',
 'Actor2Geo_Type',
 'Actor2Geo_FullName',
 'Actor2Geo_CountryCode',
 'Actor2Geo_ADM1Code',
 'Actor2Geo_ADM2Code',
 'Actor2Geo_Lat',
 'Actor2Geo_Long',
 'Actor2Geo_FeatureID',
 'ActionGeo_Type',
 'ActionGeo_FullName',
 'ActionGeo_CountryCode',
 'ActionGeo_ADM1Code',
 'ActionGeo_ADM2Code',
 'ActionGeo_Lat',
 'ActionGeo_Long',
 'ActionGeo_FeatureID',
 'DATEADDED',
 'SOURCEURL']

In [6]:
# Israeli name variants
ISRAEL_VARIANTS = set([
    'israel', 'the state of israel', 'state of israel',
    'israeli', 'israeli government', 'israeli military',
    'jerusalem', 'tel aviv', 'isr', 'idf'])

In [7]:
signif_columns = [
 'SQLDATE',
 'Actor1Code',
 'Actor1Name',
 'Actor1CountryCode',
 'Actor2Code',
 'Actor2Name',
 'Actor2CountryCode',
 'IsRootEvent',
 'EventCode',
 'EventBaseCode',
 'EventRootCode',
 'QuadClass',
 'GoldsteinScale',
 'NumMentions',
 'NumSources',
 'NumArticles',
 'AvgTone',
 'ActionGeo_FullName',
 'ActionGeo_CountryCode',
 'DATEADDED',
 'SOURCEURL']

## Data collection and preprocessing

In [None]:
#Extracting GDELT's raw files links
url = "http://data.gdeltproject.org/gdeltv2/masterfilelist.txt"
response = requests.get(url, timeout=10)
response.raise_for_status()

HTTPError: 403 Client Error: Forbidden for url: http://data.gdeltproject.org/gdeltv2/masterfilelist.txt

In [None]:
#Filtering according to dates:
#Set date range
start_date = datetime(2023, 10, 7)
end_date = datetime.today()

filtered_urls = []
bad_lines = []
lines = response.text.splitlines()
for line in lines:
    parts = line.strip().split()
    if len(parts) != 3:
        continue

    url = parts[2]
    try:
        # Extract timestamp from filename
        filename = url.split("/")[-1]
        timestamp_str = filename.split(".")[0]  # e.g. 20231007120000
        file_datetime = datetime.strptime(timestamp_str, "%Y%m%d%H%M%S")

        # Check if in range
        if start_date <= file_datetime <= end_date and url.endswith("export.CSV.zip"):
            filtered_urls.append(url)
    except Exception:
      print(f"Problem with parsing url: {url}")
len(filtered_urls)

58976

In [None]:
output_dir = "/content/drive/MyDrive/Miki/GDELT2_Data_post_7th"

Mounted at /content/drive


In [None]:
processed_log = "processed.txt"
processed_urls = set()

if os.path.exists(processed_log):
    with open(processed_log, "r") as f:
        processed_urls = set(line.strip() for line in f if line.strip())
for file_url in tqdm(filtered_urls, desc="Processing files"):
    if file_url in processed_urls:
        continue  # Skip already processed
    try:

        r = requests.get(file_url, timeout=30)
        r.raise_for_status()

        # Unzip the content in memory
        with zipfile.ZipFile(io.BytesIO(r.content)) as z:
            for filename in z.namelist():
                with z.open(filename) as f:
                    df = pd.read_csv(f,names = headers, sep='\t', encoding='ISO-8859-1', dtype=str,low_memory=False)

                    ts = file_url.split("/")[-1].split(".")[0]
                    df.to_parquet(f"{output_dir}/{ts}.parquet")
                    # Log success
                    with open(processed_log, "a") as logf:
                        logf.write(file_url + "\n")

    except Exception as e:
        print(f"Failed to process {file_url}: {e}")


Processing files:  85%|████████▍ | 50087/58976 [5:37:19<4:40:19,  1.89s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250311194500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250311194500.export.CSV.zip


Processing files:  85%|████████▍ | 50091/58976 [5:37:25<5:32:13,  2.24s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250311204500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250311204500.export.CSV.zip


Processing files:  85%|████████▍ | 50093/58976 [5:37:33<8:00:58,  3.25s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250311211500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250311211500.export.CSV.zip


Processing files:  85%|████████▍ | 50123/58976 [5:38:20<5:57:11,  2.42s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250312044500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250312044500.export.CSV.zip


Processing files:  85%|████████▍ | 50124/58976 [5:38:25<7:58:37,  3.24s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250312050000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250312050000.export.CSV.zip


Processing files:  85%|████████▍ | 50125/58976 [5:38:30<9:24:22,  3.83s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250312051500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250312051500.export.CSV.zip


Processing files:  85%|████████▍ | 50128/58976 [5:38:43<10:24:43,  4.24s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250312060000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250312060000.export.CSV.zip


Processing files:  85%|████████▌ | 50134/58976 [5:38:53<6:01:28,  2.45s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250312073000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250312073000.export.CSV.zip


Processing files:  85%|████████▌ | 50137/58976 [5:38:59<6:19:24,  2.58s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250312081500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250312081500.export.CSV.zip


Processing files:  85%|████████▌ | 50140/58976 [5:39:07<7:26:52,  3.03s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250312090000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250312090000.export.CSV.zip


Processing files:  85%|████████▌ | 50148/58976 [5:39:17<5:20:41,  2.18s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250312110000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250312110000.export.CSV.zip


Processing files:  85%|████████▌ | 50156/58976 [5:39:40<8:54:45,  3.64s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250312130000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250312130000.export.CSV.zip


Processing files:  85%|████████▌ | 50163/58976 [5:39:58<7:11:07,  2.94s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250312144500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250312144500.export.CSV.zip


Processing files:  85%|████████▌ | 50170/58976 [5:40:15<6:48:01,  2.78s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250312163000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250312163000.export.CSV.zip


Processing files:  85%|████████▌ | 50176/58976 [5:40:30<7:45:15,  3.17s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250312180000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250312180000.export.CSV.zip


Processing files:  85%|████████▌ | 50190/58976 [5:40:54<5:57:22,  2.44s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250312213000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250312213000.export.CSV.zip


Processing files:  85%|████████▌ | 50195/58976 [5:41:01<5:23:53,  2.21s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250312224500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250312224500.export.CSV.zip


Processing files:  85%|████████▌ | 50199/58976 [5:41:08<5:41:06,  2.33s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250312234500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250312234500.export.CSV.zip


Processing files:  85%|████████▌ | 50214/58976 [5:41:42<6:21:47,  2.61s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250313033000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250313033000.export.CSV.zip


Processing files:  85%|████████▌ | 50215/58976 [5:41:47<8:17:09,  3.40s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250313034500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250313034500.export.CSV.zip


Processing files:  85%|████████▌ | 50226/58976 [5:42:07<5:47:51,  2.39s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250313063000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250313063000.export.CSV.zip


Processing files:  85%|████████▌ | 50235/58976 [5:42:26<5:53:01,  2.42s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250313084500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250313084500.export.CSV.zip


Processing files:  85%|████████▌ | 50245/58976 [5:42:47<6:58:42,  2.88s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250313111500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250313111500.export.CSV.zip


Processing files:  85%|████████▌ | 50246/58976 [5:42:52<8:39:52,  3.57s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250313113000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250313113000.export.CSV.zip


Processing files:  85%|████████▌ | 50248/58976 [5:42:59<8:45:33,  3.61s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250313120000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250313120000.export.CSV.zip


Processing files:  85%|████████▌ | 50251/58976 [5:43:08<8:49:08,  3.64s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250313124500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250313124500.export.CSV.zip


Processing files:  85%|████████▌ | 50257/58976 [5:43:20<7:09:41,  2.96s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250313141500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250313141500.export.CSV.zip


Processing files:  85%|████████▌ | 50272/58976 [5:43:50<6:17:06,  2.60s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250313180000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250313180000.export.CSV.zip


Processing files:  85%|████████▌ | 50274/58976 [5:43:56<7:18:44,  3.03s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250313183000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250313183000.export.CSV.zip


Processing files:  85%|████████▌ | 50288/58976 [5:44:29<6:27:00,  2.67s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250313220000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250313220000.export.CSV.zip


Processing files:  85%|████████▌ | 50299/58976 [5:44:44<6:05:24,  2.53s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250314004500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250314004500.export.CSV.zip


Processing files:  85%|████████▌ | 50312/58976 [5:45:17<8:18:21,  3.45s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250314040000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250314040000.export.CSV.zip


Processing files:  85%|████████▌ | 50321/58976 [5:45:40<8:31:06,  3.54s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250314061500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250314061500.export.CSV.zip


Processing files:  85%|████████▌ | 50322/58976 [5:45:45<9:42:53,  4.04s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250314063000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250314063000.export.CSV.zip


Processing files:  85%|████████▌ | 50341/58976 [5:46:13<6:02:42,  2.52s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250314111500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250314111500.export.CSV.zip


Processing files:  85%|████████▌ | 50343/58976 [5:46:19<6:58:12,  2.91s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250314114500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250314114500.export.CSV.zip


Processing files:  85%|████████▌ | 50349/58976 [5:46:29<5:56:42,  2.48s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250314131500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250314131500.export.CSV.zip


Processing files:  85%|████████▌ | 50361/58976 [5:47:00<7:57:30,  3.33s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250314161500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250314161500.export.CSV.zip


Processing files:  85%|████████▌ | 50371/58976 [5:47:21<7:42:04,  3.22s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250314184500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250314184500.export.CSV.zip


Processing files:  85%|████████▌ | 50374/58976 [5:47:30<7:47:58,  3.26s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250314193000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250314193000.export.CSV.zip


Processing files:  85%|████████▌ | 50383/58976 [5:47:49<6:34:27,  2.75s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250314214500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250314214500.export.CSV.zip


Processing files:  85%|████████▌ | 50392/58976 [5:48:07<6:06:00,  2.56s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250315000000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250315000000.export.CSV.zip


Processing files:  85%|████████▌ | 50394/58976 [5:48:16<8:48:36,  3.70s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250315003000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250315003000.export.CSV.zip


Processing files:  85%|████████▌ | 50403/58976 [5:48:38<8:48:39,  3.70s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250315024500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250315024500.export.CSV.zip


Processing files:  85%|████████▌ | 50414/58976 [5:48:52<5:04:20,  2.13s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250315053000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250315053000.export.CSV.zip


Processing files:  86%|████████▌ | 50434/58976 [5:49:23<5:56:51,  2.51s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250315103000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250315103000.export.CSV.zip


Processing files:  86%|████████▌ | 50438/58976 [5:49:32<6:08:20,  2.59s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250315113000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250315113000.export.CSV.zip


Processing files:  86%|████████▌ | 50444/58976 [5:49:39<5:25:21,  2.29s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250315130000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250315130000.export.CSV.zip


Processing files:  86%|████████▌ | 50461/58976 [5:50:13<5:10:22,  2.19s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250315171500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250315171500.export.CSV.zip


Processing files:  86%|████████▌ | 50471/58976 [5:50:29<6:27:00,  2.73s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250315194500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250315194500.export.CSV.zip


Processing files:  86%|████████▌ | 50472/58976 [5:50:35<8:34:50,  3.63s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250315200000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250315200000.export.CSV.zip


Processing files:  86%|████████▌ | 50475/58976 [5:50:48<9:55:14,  4.20s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250315204500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250315204500.export.CSV.zip


Processing files:  86%|████████▌ | 50503/58976 [5:51:30<7:43:49,  3.28s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250316034500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250316034500.export.CSV.zip


Processing files:  86%|████████▌ | 50512/58976 [5:51:54<7:30:12,  3.19s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250316060000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250316060000.export.CSV.zip


Processing files:  86%|████████▌ | 50517/58976 [5:52:04<6:03:53,  2.58s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250316071500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250316071500.export.CSV.zip


Processing files:  86%|████████▌ | 50523/58976 [5:52:22<8:01:39,  3.42s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250316084500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250316084500.export.CSV.zip


Processing files:  86%|████████▌ | 50531/58976 [5:52:38<5:53:28,  2.51s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250316104500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250316104500.export.CSV.zip


Processing files:  86%|████████▌ | 50573/58976 [5:53:59<7:00:07,  3.00s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250316211500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250316211500.export.CSV.zip


Processing files:  86%|████████▌ | 50580/58976 [5:54:19<7:49:26,  3.35s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250316230000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250316230000.export.CSV.zip


Processing files:  86%|████████▌ | 50597/58976 [5:54:59<6:20:17,  2.72s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250317031500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250317031500.export.CSV.zip


Processing files:  86%|████████▌ | 50604/58976 [5:55:20<8:36:31,  3.70s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250317050000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250317050000.export.CSV.zip


Processing files:  86%|████████▌ | 50607/58976 [5:55:29<8:04:24,  3.47s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250317054500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250317054500.export.CSV.zip


Processing files:  86%|████████▌ | 50608/58976 [5:55:34<9:18:01,  4.00s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250317060000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250317060000.export.CSV.zip


Processing files:  86%|████████▌ | 50610/58976 [5:55:39<8:24:17,  3.62s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250317063000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250317063000.export.CSV.zip


Processing files:  86%|████████▌ | 50612/58976 [5:55:48<9:34:24,  4.12s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250317070000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250317070000.export.CSV.zip


Processing files:  86%|████████▌ | 50617/58976 [5:56:08<10:14:31,  4.41s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250317081500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250317081500.export.CSV.zip


Processing files:  86%|████████▌ | 50618/58976 [5:56:13<10:48:34,  4.66s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250317083000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250317083000.export.CSV.zip


Processing files:  86%|████████▌ | 50623/58976 [5:56:24<7:03:40,  3.04s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250317094500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250317094500.export.CSV.zip


Processing files:  86%|████████▌ | 50628/58976 [5:56:38<8:10:27,  3.53s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250317110000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250317110000.export.CSV.zip


Processing files:  86%|████████▌ | 50630/58976 [5:56:45<8:29:53,  3.67s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250317113000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250317113000.export.CSV.zip


Processing files:  86%|████████▌ | 50645/58976 [5:56:58<4:34:34,  1.98s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250317151500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250317151500.export.CSV.zip


Processing files:  86%|████████▌ | 50648/58976 [5:57:10<7:42:26,  3.33s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250317160000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250317160000.export.CSV.zip


Processing files:  86%|████████▌ | 50655/58976 [5:57:29<7:44:41,  3.35s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250317174500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250317174500.export.CSV.zip


Processing files:  86%|████████▌ | 50678/58976 [5:58:27<5:38:19,  2.45s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250317233000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250317233000.export.CSV.zip


Processing files:  86%|████████▌ | 50682/58976 [5:58:38<6:50:56,  2.97s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250318003000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250318003000.export.CSV.zip


Processing files:  86%|████████▌ | 50695/58976 [5:59:08<7:04:23,  3.07s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250318034500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250318034500.export.CSV.zip


Processing files:  86%|████████▌ | 50696/58976 [5:59:14<8:33:29,  3.72s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250318040000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250318040000.export.CSV.zip


Processing files:  86%|████████▌ | 50706/58976 [5:59:50<10:00:22,  4.36s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250318063000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250318063000.export.CSV.zip


Processing files:  86%|████████▌ | 50714/58976 [6:00:10<7:11:13,  3.13s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250318083000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250318083000.export.CSV.zip


Processing files:  86%|████████▌ | 50720/58976 [6:00:26<6:58:05,  3.04s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250318100000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250318100000.export.CSV.zip


Processing files:  86%|████████▌ | 50731/58976 [6:00:52<6:15:21,  2.73s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250318124500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250318124500.export.CSV.zip


Processing files:  86%|████████▌ | 50740/58976 [6:01:17<7:13:00,  3.15s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250318150000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250318150000.export.CSV.zip


Processing files:  86%|████████▌ | 50764/58976 [6:02:20<8:02:10,  3.52s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250318210000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250318210000.export.CSV.zip


Processing files:  86%|████████▌ | 50765/58976 [6:02:26<9:12:20,  4.04s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250318211500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250318211500.export.CSV.zip


Processing files:  86%|████████▌ | 50767/58976 [6:02:36<10:23:52,  4.56s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250318214500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250318214500.export.CSV.zip


Processing files:  86%|████████▌ | 50771/58976 [6:02:46<7:43:56,  3.39s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250318224500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250318224500.export.CSV.zip


Processing files:  86%|████████▌ | 50780/58976 [6:03:07<6:04:08,  2.67s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250319010000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250319010000.export.CSV.zip


Processing files:  86%|████████▌ | 50797/58976 [6:03:50<7:33:16,  3.33s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250319051500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250319051500.export.CSV.zip


Processing files:  86%|████████▌ | 50799/58976 [6:03:58<8:43:48,  3.84s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250319054500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250319054500.export.CSV.zip


Processing files:  86%|████████▌ | 50807/58976 [6:04:23<8:12:09,  3.61s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250319074500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250319074500.export.CSV.zip


Processing files:  86%|████████▌ | 50819/58976 [6:04:51<6:54:17,  3.05s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250319104500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250319104500.export.CSV.zip


Processing files:  86%|████████▌ | 50828/58976 [6:05:16<7:53:20,  3.49s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250319130000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250319130000.export.CSV.zip


Processing files:  86%|████████▌ | 50833/58976 [6:05:31<7:01:51,  3.11s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250319141500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250319141500.export.CSV.zip


Processing files:  86%|████████▌ | 50839/58976 [6:05:50<7:51:47,  3.48s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250319154500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250319154500.export.CSV.zip


Processing files:  86%|████████▌ | 50840/58976 [6:05:55<9:00:46,  3.99s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250319160000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250319160000.export.CSV.zip


Processing files:  86%|████████▌ | 50846/58976 [6:06:15<9:04:27,  4.02s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250319173000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250319173000.export.CSV.zip


Processing files:  86%|████████▌ | 50851/58976 [6:06:33<8:43:44,  3.87s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250319184500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250319184500.export.CSV.zip


Processing files:  86%|████████▌ | 50852/58976 [6:06:38<9:58:56,  4.42s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250319190000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250319190000.export.CSV.zip


Processing files:  86%|████████▌ | 50857/58976 [6:06:52<7:31:46,  3.34s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250319201500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250319201500.export.CSV.zip


Processing files:  86%|████████▋ | 50882/58976 [6:07:42<7:10:37,  3.19s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250320023000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250320023000.export.CSV.zip


Processing files:  86%|████████▋ | 50883/58976 [6:07:48<8:32:49,  3.80s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250320024500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250320024500.export.CSV.zip


Processing files:  86%|████████▋ | 50884/58976 [6:07:53<9:30:42,  4.23s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250320030000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250320030000.export.CSV.zip


Processing files:  86%|████████▋ | 50897/58976 [6:08:18<6:17:42,  2.81s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250320061500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250320061500.export.CSV.zip


Processing files:  86%|████████▋ | 50934/58976 [6:09:44<7:41:58,  3.45s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250320153000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250320153000.export.CSV.zip


Processing files:  86%|████████▋ | 50937/58976 [6:09:54<8:02:35,  3.60s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250320161500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250320161500.export.CSV.zip


Processing files:  86%|████████▋ | 50942/58976 [6:10:04<5:53:55,  2.64s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250320173000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250320173000.export.CSV.zip


Processing files:  86%|████████▋ | 50946/58976 [6:10:15<7:30:57,  3.37s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250320183000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250320183000.export.CSV.zip


Processing files:  86%|████████▋ | 50980/58976 [6:11:42<7:00:50,  3.16s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250321030000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250321030000.export.CSV.zip


Processing files:  86%|████████▋ | 50988/58976 [6:11:59<6:08:20,  2.77s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250321050000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250321050000.export.CSV.zip


Processing files:  86%|████████▋ | 51000/58976 [6:12:31<8:35:48,  3.88s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250321080000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250321080000.export.CSV.zip


Processing files:  86%|████████▋ | 51007/58976 [6:12:47<6:44:30,  3.05s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250321094500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250321094500.export.CSV.zip


Processing files:  86%|████████▋ | 51013/58976 [6:13:02<6:43:41,  3.04s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250321111500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250321111500.export.CSV.zip


Processing files:  87%|████████▋ | 51018/58976 [6:13:13<6:40:45,  3.02s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250321123000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250321123000.export.CSV.zip


Processing files:  87%|████████▋ | 51024/58976 [6:13:37<10:00:07,  4.53s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250321140000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250321140000.export.CSV.zip


Processing files:  87%|████████▋ | 51028/58976 [6:13:50<8:43:34,  3.95s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250321150000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250321150000.export.CSV.zip


Processing files:  87%|████████▋ | 51051/58976 [6:14:47<7:29:24,  3.40s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250321204500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250321204500.export.CSV.zip


Processing files:  87%|████████▋ | 51080/58976 [6:15:32<7:24:55,  3.38s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250322040000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250322040000.export.CSV.zip


Processing files:  87%|████████▋ | 51089/58976 [6:15:47<5:07:12,  2.34s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250322061500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250322061500.export.CSV.zip


Processing files:  87%|████████▋ | 51098/58976 [6:16:08<5:59:05,  2.73s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250322083000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250322083000.export.CSV.zip


Processing files:  87%|████████▋ | 51101/58976 [6:16:17<7:18:32,  3.34s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250322091500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250322091500.export.CSV.zip


Processing files:  87%|████████▋ | 51106/58976 [6:16:29<6:04:05,  2.78s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250322103000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250322103000.export.CSV.zip


Processing files:  87%|████████▋ | 51115/58976 [6:16:49<5:51:11,  2.68s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250322124500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250322124500.export.CSV.zip


Processing files:  87%|████████▋ | 51124/58976 [6:17:18<8:16:42,  3.80s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250322150000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250322150000.export.CSV.zip


Processing files:  87%|████████▋ | 51149/58976 [6:18:15<6:59:55,  3.22s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250322211500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250322211500.export.CSV.zip


Processing files:  87%|████████▋ | 51176/58976 [6:19:15<5:04:29,  2.34s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250323040000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250323040000.export.CSV.zip


Processing files:  87%|████████▋ | 51179/58976 [6:19:27<7:44:17,  3.57s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250323044500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250323044500.export.CSV.zip


Processing files:  87%|████████▋ | 51192/58976 [6:19:56<6:46:51,  3.14s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250323080000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250323080000.export.CSV.zip


Processing files:  87%|████████▋ | 51201/58976 [6:20:11<5:29:22,  2.54s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250323101500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250323101500.export.CSV.zip


Processing files:  87%|████████▋ | 51207/58976 [6:20:27<7:00:46,  3.25s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250323114500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250323114500.export.CSV.zip


Processing files:  87%|████████▋ | 51211/58976 [6:20:40<7:34:43,  3.51s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250323124500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250323124500.export.CSV.zip


Processing files:  87%|████████▋ | 51216/58976 [6:20:52<5:59:52,  2.78s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250323140000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250323140000.export.CSV.zip


Processing files:  87%|████████▋ | 51226/58976 [6:21:15<7:37:32,  3.54s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250323163000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250323163000.export.CSV.zip


Processing files:  87%|████████▋ | 51228/58976 [6:21:22<7:58:48,  3.71s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250323170000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250323170000.export.CSV.zip


Processing files:  87%|████████▋ | 51237/58976 [6:21:49<7:51:08,  3.65s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250323191500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250323191500.export.CSV.zip


Processing files:  87%|████████▋ | 51240/58976 [6:22:00<8:17:59,  3.86s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250323200000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250323200000.export.CSV.zip


Processing files:  87%|████████▋ | 51247/58976 [6:22:24<8:38:22,  4.02s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250323214500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250323214500.export.CSV.zip


Processing files:  87%|████████▋ | 51250/58976 [6:22:32<6:59:54,  3.26s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250323223000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250323223000.export.CSV.zip


Processing files:  87%|████████▋ | 51259/58976 [6:22:50<6:09:24,  2.87s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250324004500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250324004500.export.CSV.zip


Processing files:  87%|████████▋ | 51283/58976 [6:23:28<5:48:46,  2.72s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250324064500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250324064500.export.CSV.zip


Processing files:  87%|████████▋ | 51300/58976 [6:23:59<4:33:43,  2.14s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250324110000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250324110000.export.CSV.zip


Processing files:  87%|████████▋ | 51327/58976 [6:24:49<4:29:33,  2.11s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250324174500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250324174500.export.CSV.zip


Processing files:  87%|████████▋ | 51333/58976 [6:25:00<5:01:16,  2.37s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250324191500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250324191500.export.CSV.zip


Processing files:  87%|████████▋ | 51378/58976 [6:25:43<4:06:48,  1.95s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250325063000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250325063000.export.CSV.zip


Processing files:  87%|████████▋ | 51385/58976 [6:25:54<4:19:42,  2.05s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250325081500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250325081500.export.CSV.zip


Processing files:  87%|████████▋ | 51414/58976 [6:26:26<4:32:02,  2.16s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250325153000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250325153000.export.CSV.zip


Processing files:  87%|████████▋ | 51431/58976 [6:26:49<5:04:08,  2.42s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250325194500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250325194500.export.CSV.zip


Processing files:  87%|████████▋ | 51464/58976 [6:27:23<3:52:17,  1.86s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250326040000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250326040000.export.CSV.zip


Processing files:  87%|████████▋ | 51510/58976 [6:28:11<3:56:27,  1.90s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250326153000.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250326153000.export.CSV.zip


Processing files:  87%|████████▋ | 51517/58976 [6:28:22<5:24:12,  2.61s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250326171500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250326171500.export.CSV.zip


Processing files:  88%|████████▊ | 51606/58976 [6:29:34<4:14:06,  2.07s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250327154500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250327154500.export.CSV.zip


Processing files:  88%|████████▊ | 51884/58976 [6:32:13<3:28:25,  1.76s/it]

Failed to process http://data.gdeltproject.org/gdeltv2/20250330131500.export.CSV.zip: 500 Server Error: Internal Server Error for url: http://data.gdeltproject.org/gdeltv2/20250330131500.export.CSV.zip


Processing files: 100%|██████████| 58976/58976 [7:18:46<00:00,  2.24it/s]


### Preprocessing:

In [None]:
input_folder = '/content/drive/MyDrive/Miki/GDELT2_Data_post_7th'
output_folder = '/content/drive/MyDrive/Miki/GDELT2_Data_post_7th/Preprocessed/Israeli_Interactions'
input_list_file = os.path.join(output_folder, 'input_files.txt')
processed_log = os.path.join(output_folder, 'processed_files.txt')

os.makedirs(output_folder, exist_ok=True)

BATCH_SIZE = 100

#Create input file list if missing
if not os.path.exists(input_list_file):
    print("Creating static input list...")
    parquet_files = sorted([
        os.path.join(input_folder, f)
        for f in os.listdir(input_folder)
        if f.endswith('.parquet')
    ])
    with open(input_list_file, 'w') as f:
        for path in parquet_files:
            f.write(path + '\n')

# Read input + checkpoint files
with open(input_list_file, 'r') as f:
    parquet_files = [line.strip() for line in f]

if os.path.exists(processed_log):
    with open(processed_log, 'r') as f:
        processed_files = set(f.read().splitlines())
else:
    processed_files = set()

# Process files with filtering
file_buffer = []
file_counter = 0
output_counter = 0

for file_path in tqdm(parquet_files, desc="Preprocessing GDELT Files"):
    filename = os.path.basename(file_path)

    if filename in processed_files:
        continue  # Already processed

    try:
        df = pd.read_parquet(file_path, columns=signif_columns)
        filtered_df = df[
            df['Actor1Name'].str.lower().isin(ISRAEL_VARIANTS) |
            df['Actor2Name'].str.lower().isin(ISRAEL_VARIANTS) | df['Actor1CountryCode'].str.lower().isin(ISRAEL_VARIANTS) | df['Actor2CountryCode'].str.lower().isin(ISRAEL_VARIANTS) |df['ActionGeo_CountryCode'].str.lower().isin(ISRAEL_VARIANTS) | df['ActionGeo_FullName'].str.lower().isin(ISRAEL_VARIANTS)]

        if not filtered_df.empty:
            file_buffer.append(filtered_df)
            file_counter += 1

        # Mark file as processed
        with open(processed_log, 'a') as f:
            f.write(filename + '\n')
        processed_files.add(filename)

        # Save in batches
        if file_counter >= BATCH_SIZE:
            out_path = os.path.join(output_folder, f'israel_filtered_batch_{output_counter}.parquet')
            pd.concat(file_buffer, ignore_index=True).to_parquet(out_path)
            file_buffer = []
            file_counter = 0
            output_counter += 1

    except Exception as e:
        print(f"[ERROR] Failed processing {filename}: {e}")

# === Save remaining records ===
if file_buffer:
    out_path = os.path.join(output_folder, f'israel_filtered_batch_{output_counter}.parquet')
    pd.concat(file_buffer, ignore_index=True).to_parquet(out_path)


Creating static input list...


Preprocessing GDELT Files: 100%|██████████| 58825/58825 [8:31:10<00:00,  1.92it/s]


Merging all parquets to a single one:

In [None]:
output_dir = '/content/drive/MyDrive/Miki/GDELT2_Data_post_7th/Preprocessed/Israeli_Interactions'
parquet_files = glob.glob(os.path.join(output_dir, '*.parquet'))
list_dfs = [pd.read_parquet(f) for f in parquet_files]
merged_df = pd.concat(list_dfs, ignore_index=True)

# Define the path for the output merged parquet file
output_parquet_path = os.path.join(output_dir, 'merged_israeli_interactions.parquet')

# Save the merged DataFrame to a single parquet file
merged_df.to_parquet(output_parquet_path)

print(f"Merged {len(parquet_files)} parquet files into {output_parquet_path}")
print(f"Shape of the merged DataFrame: {merged_df.shape}")
total_memory = merged_df.memory_usage(deep=True).sum()
print(f"Total memory usage: {total_memory / (1024 ** 2):.2f} MB")

Merged 588 parquet files into /content/drive/MyDrive/Miki/GDELT2_Data_post_7th/Preprocessed/Israeli_Interactions/merged_israeli_interactions.parquet
Shape of the merged DataFrame: (4815024, 21)
Total memory usage: 6359.54 MB


In [None]:
merged_df

Unnamed: 0,SQLDATE,Actor1Code,Actor1Name,Actor1CountryCode,Actor2Code,Actor2Name,Actor2CountryCode,IsRootEvent,EventCode,EventBaseCode,EventRootCode,QuadClass,GoldsteinScale,NumMentions,NumSources,NumArticles,AvgTone,ActionGeo_FullName,ActionGeo_CountryCode,DATEADDED,SOURCEURL
0,20231007,,,,ISRSET,ISRAEL,ISR,1,190,190,19,4,-10.0,12,6,12,-11.2273361227336,Israel,IS,20231007000000,https://www.iranherald.com/news/273991510/pale...
1,20231007,ISR,ISRAEL,ISR,,,,0,129,129,12,3,-5.0,5,1,5,-0.81234768480909,"Kyiv, Kyyiv, Misto, Ukraine",UP,20231007000000,https://www.politico.com/newsletters/national-...
2,20231007,ISR,ISRAEL,ISR,BUS,PRODUCER,,0,129,129,12,3,-5.0,5,1,5,-0.81234768480909,"Kyiv, Kyyiv, Misto, Ukraine",UP,20231007000000,https://www.politico.com/newsletters/national-...
3,20231007,ISR,ISRAEL,ISR,ISRSET,ISRAEL,ISR,1,193,193,19,4,-10.0,4,1,4,-10.8786610878661,Israel,IS,20231007000000,https://www.bignewsnetwork.com/news/273991510/...
4,20231007,ISRSET,ISRAEL,ISR,ISR,ISRAEL,ISR,1,182,182,18,4,-9.5,12,6,12,-11.2273361227336,Israel,IS,20231007000000,https://www.iranherald.com/news/273991510/pale...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4815019,20250612,UAF,MILITANT,,ISR,ISRAELI,ISR,0,015,015,01,1,0.0,10,1,10,-6.57193605683837,"Gaza, Israel (general), Israel",IS,20250612111500,https://www.yahoo.com/news/humanitarian-worker...
4815020,20250612,USA,UNITED STATES,USA,ISR,ISRAEL,ISR,0,114,114,11,3,-2.0,4,2,4,-9.63518192603414,United States,US,20250612111500,https://www.yahoo.com/news/contributor-scars-u...
4815021,20250612,USAGOV,THE US,USA,ISR,TEL AVIV,ISR,1,172,172,17,4,-5.0,1,1,1,-6.60660660660661,"Tehran, Tehran, Iran",IR,20250612111500,https://www.aa.com.tr/en/middle-east/us-embass...
4815022,20250612,USAGOV,THE US,USA,ISR,TEL AVIV,ISR,1,172,172,17,4,-5.0,1,1,1,-6.60660660660661,"Washington, District of Columbia, United States",US,20250612111500,https://www.aa.com.tr/en/middle-east/us-embass...


In [None]:
cameo_code_path = "/content/drive/MyDrive/Miki/cameocodes.txt"
with open(cameo_code_path, 'r') as f:
  cameo_codes_content = f.readlines()
cameo_codes_content = [line.strip() for line in cameo_codes_content]

In [None]:
data = []
for line in cameo_codes_content[1:]:
    parts = line.split('\t' if '\t' in line else '\\')
    if len(parts) == 2:
        code = parts[0].strip()
        description = parts[1].strip()
        data.append((code, description))
    elif len(parts) == 1:
        data.append((parts[0].strip(), ''))
df_cameo_codes = pd.DataFrame(data, columns=['CAMEO_Code', 'Description'])

In [None]:
df_cameo_codes['CAMEO_Code'] = df_cameo_codes['CAMEO_Code'].astype(str)
df_cameo_codes.to_csv("/content/drive/MyDrive/Miki/cameocodes.csv", index=False)

In [None]:
cameo_codes_list = list(df_cameo_codes['CAMEO_Code'].unique())

## Joining with CAMEO event descriptions find online

In [3]:
path = '/content/drive/MyDrive/Miki/GDELT2_Data_post_7th/Preprocessed/Israeli_Interactions/merged_israeli_interactions.parquet'
df = dd.read_parquet(path)

NameError: name 'dd' is not defined

In [None]:
cameo_df = pd.read_csv('/content/drive/MyDrive/Miki/cameocodes.csv', dtype={'CAMEO_Code': str})
cameo_df.rename(columns={'CAMEO_Code': 'EventBaseCode','Description':'Event Description'}, inplace=True)
cameo_df

Unnamed: 0,EventBaseCode,Event Description
0,01,MAKE PUBLIC STATEMENT
1,010,"Make statement, not specified below"
2,011,Decline comment
3,012,Make pessimistic comment
4,013,Make optimistic comment
...,...,...
305,202,Engage in mass killings
306,203,Engage in ethnic cleansing
307,204,"Use weapons of mass destruction, not specified..."
308,2041,"Use chemical, biological, or radiologicalweapons"


In [None]:
df.head()

Unnamed: 0,SQLDATE,Actor1Code,Actor1Name,Actor1CountryCode,Actor2Code,Actor2Name,Actor2CountryCode,IsRootEvent,EventCode,EventBaseCode,EventRootCode,QuadClass,GoldsteinScale,NumMentions,NumSources,NumArticles,AvgTone,ActionGeo_FullName,ActionGeo_CountryCode,DATEADDED,SOURCEURL
0,20231007,,,,ISRSET,ISRAEL,ISR,1,190,190,19,4,-10.0,12,6,12,-11.2273361227336,Israel,IS,20231007000000,https://www.iranherald.com/news/273991510/pale...
1,20231007,ISR,ISRAEL,ISR,,,,0,129,129,12,3,-5.0,5,1,5,-0.81234768480909,"Kyiv, Kyyiv, Misto, Ukraine",UP,20231007000000,https://www.politico.com/newsletters/national-...
2,20231007,ISR,ISRAEL,ISR,BUS,PRODUCER,,0,129,129,12,3,-5.0,5,1,5,-0.81234768480909,"Kyiv, Kyyiv, Misto, Ukraine",UP,20231007000000,https://www.politico.com/newsletters/national-...
3,20231007,ISR,ISRAEL,ISR,ISRSET,ISRAEL,ISR,1,193,193,19,4,-10.0,4,1,4,-10.8786610878661,Israel,IS,20231007000000,https://www.bignewsnetwork.com/news/273991510/...
4,20231007,ISRSET,ISRAEL,ISR,ISR,ISRAEL,ISR,1,182,182,18,4,-9.5,12,6,12,-11.2273361227336,Israel,IS,20231007000000,https://www.iranherald.com/news/273991510/pale...


In [None]:
# Function to join each partition
def join_partition(partition, small_df):
    result = partition.merge(cameo_df, how='left', on='EventBaseCode')
    return result
# Infer meta (required by Dask to know output schema)
meta = join_partition(df._meta, cameo_df)
# Perform broadcast join
joined_ddf = df.map_partitions(join_partition, cameo_df, meta=meta)

joined_ddf = joined_ddf.repartition(npartitions=1)
joined_ddf.to_parquet('/content/drive/MyDrive/Miki/israeli_interactions_post_7th_w_cameo.parquet', overwrite=True)