In [8]:
import os
import io
import zipfile
import requests
import pandas as pd
from glob import glob
from calendar import monthrange

# Lista de event codes de interés
event_codes = [
    '1413', '1414', '1431', '1432', '1433', '1434',
    '1451', '1452', '1453', '1454',
    '161', '163', '174', '175', '176',
    '190', '191', '192', '193', '194', '196',
    '200', '201', '202', '203', '2041', '2042'
]

# Carpeta destino
output_folder = "/content/gdelt_event_data"
os.makedirs(output_folder, exist_ok=True)

In [9]:
def process_annual_file(year):
    url = f"http://data.gdeltproject.org/events/{year}.zip"
    print(f"⬇️ Downloading annual file: {url}")

    try:
        r = requests.get(url, timeout=30)
        if r.status_code != 200:
            print(f"❌ Failed: {url}")
            return

        with zipfile.ZipFile(io.BytesIO(r.content)) as z:
            with z.open(z.namelist()[0]) as f:
                df = pd.read_csv(f, sep='\t', header=None, usecols=[2, 26], dtype=str)

        df = df[df[26].isin(event_codes)].copy()
        df['Year'] = df[2].str[:4].astype(int)
        df['Month'] = df[2].str[4:6].astype(int)
        df = df.rename(columns={26: 'EventCode'})

        outpath = os.path.join(output_folder, f"gdelt_{year}.csv")
        df.groupby(['Year', 'Month', 'EventCode']).size().reset_index(name='count').to_csv(outpath, index=False)
        print(f"✅ Saved: {outpath}")
    except Exception as e:
        print(f"❌ Error: {e}")

In [10]:
def process_monthly_file(year, month):
    url = f"http://data.gdeltproject.org/events/{year}{month:02d}.zip"
    print(f"⬇️ Downloading monthly file: {url}")

    try:
        r = requests.get(url, timeout=30)
        if r.status_code != 200:
            print(f"❌ Failed: {url}")
            return

        with zipfile.ZipFile(io.BytesIO(r.content)) as z:
            with z.open(z.namelist()[0]) as f:
                df = pd.read_csv(f, sep='\t', header=None, usecols=[2, 26], dtype=str)

        df = df[df[26].isin(event_codes)].copy()
        df['Year'] = df[2].str[:4].astype(int)
        df['Month'] = df[2].str[4:6].astype(int)
        df = df.rename(columns={26: 'EventCode'})

        outpath = os.path.join(output_folder, f"gdelt_{year}{month:02d}.csv")
        df.groupby(['Year', 'Month', 'EventCode']).size().reset_index(name='count').to_csv(outpath, index=False)
        print(f"✅ Saved: {outpath}")
    except Exception as e:
        print(f"❌ Error: {e}")

In [11]:
def process_daily_file(year, month, day):
    date_str = f"{year}{month:02d}{day:02d}"
    url = f"http://data.gdeltproject.org/events/{date_str}.export.CSV.zip"
    print(f"⬇️ Downloading daily file: {url}")

    try:
        r = requests.get(url, timeout=30)
        if r.status_code != 200:
            return

        with zipfile.ZipFile(io.BytesIO(r.content)) as z:
            with z.open(z.namelist()[0]) as f:
                df = pd.read_csv(f, sep='\t', header=None, usecols=[2, 26], dtype=str)

        df = df[df[26].isin(event_codes)].copy()
        df['Year'] = df[2].str[:4].astype(int)
        df['Month'] = df[2].str[4:6].astype(int)
        df = df.rename(columns={26: 'EventCode'})

        outpath = os.path.join(output_folder, f"gdelt_{date_str}.csv")
        df.groupby(['Year', 'Month', 'EventCode']).size().reset_index(name='count').to_csv(outpath, index=False)
        print(f"✅ Saved: {outpath}")
    except Exception as e:
        print(f"❌ Error {date_str}: {e}")

In [13]:
# 1. Archivos anuales: 1990–2005
# for y in range(1990, 2006):
#     process_annual_file(y)

# # 2. Archivos mensuales: 2006–2012
# for y in range(2006, 2013):
#     for m in range(1, 13):
#         process_monthly_file(y, m)

# 3. Archivos diarios: 2013–2024
for y in range(2013, 2014):
    for m in range(1, 2):
        for d in range(1, monthrange(y, m)[1] + 1):
            process_daily_file(y, m, d)

⬇️ Downloading daily file: http://data.gdeltproject.org/events/20130101.export.CSV.zip
⬇️ Downloading daily file: http://data.gdeltproject.org/events/20130102.export.CSV.zip
⬇️ Downloading daily file: http://data.gdeltproject.org/events/20130103.export.CSV.zip
⬇️ Downloading daily file: http://data.gdeltproject.org/events/20130104.export.CSV.zip
⬇️ Downloading daily file: http://data.gdeltproject.org/events/20130105.export.CSV.zip
⬇️ Downloading daily file: http://data.gdeltproject.org/events/20130106.export.CSV.zip
⬇️ Downloading daily file: http://data.gdeltproject.org/events/20130107.export.CSV.zip
⬇️ Downloading daily file: http://data.gdeltproject.org/events/20130108.export.CSV.zip
⬇️ Downloading daily file: http://data.gdeltproject.org/events/20130109.export.CSV.zip
⬇️ Downloading daily file: http://data.gdeltproject.org/events/20130110.export.CSV.zip
⬇️ Downloading daily file: http://data.gdeltproject.org/events/20130111.export.CSV.zip
⬇️ Downloading daily file: http://data.gdel

In [14]:
# 📊 CONSOLIDACIÓN FINAL

files = glob(f"{output_folder}/*.csv")
all_dfs = [pd.read_csv(f, dtype={'EventCode': str}) for f in files if os.path.getsize(f) > 0]

if all_dfs:
    final_df = pd.concat(all_dfs, ignore_index=True)
    final_df = final_df.groupby(['Year', 'Month', 'Day', 'EventCode'])['count'].sum().reset_index()
    final_df.to_csv("/content/gdelt_daily_events_2013_2024.csv", index=False)
    print("✅ Consolidado guardado como gdelt_daily_events_2013_2024.csv")
else:
    print("⚠️ No se encontraron datos descargados para consolidar.")

⚠️ No se encontraron datos descargados para consolidar.


In [16]:
import os
import requests

# Crear carpeta donde guardar los .zip
output_folder = "/content/gdelt_zips_funcionando"
os.makedirs(output_folder, exist_ok=True)

# Fechas de archivos que sí están disponibles
dates_available = [
    '20130101', '20130102', '20130103', '20130104', '20130105', '20130106',
    '20130107', '20130108', '20130109', '20130110', '20130111', '20130112',
    '20130113', '20130114', '20130115', '20130116', '20130117', '20130118',
    '20130119', '20130120', '20130121', '20130122', '20130123', '20130124'
]

# Descargar cada archivo como .zip
for date_str in dates_available:
    url = f"http://data.gdeltproject.org/events/{date_str}.export.CSV.zip"
    dest_path = os.path.join(output_folder, f"{date_str}.zip")
    print(f"⬇️ Descargando: {url}")
    try:
        r = requests.get(url, timeout=30)
        with open(dest_path, 'wb') as f:
            f.write(r.content)
        print(f"✅ Guardado: {dest_path}")
    except Exception as e:
        print(f"❌ Error en {date_str}: {e}")

⬇️ Descargando: http://data.gdeltproject.org/events/20130101.export.CSV.zip
✅ Guardado: /content/gdelt_zips_funcionando/20130101.zip
⬇️ Descargando: http://data.gdeltproject.org/events/20130102.export.CSV.zip
✅ Guardado: /content/gdelt_zips_funcionando/20130102.zip
⬇️ Descargando: http://data.gdeltproject.org/events/20130103.export.CSV.zip
✅ Guardado: /content/gdelt_zips_funcionando/20130103.zip
⬇️ Descargando: http://data.gdeltproject.org/events/20130104.export.CSV.zip
✅ Guardado: /content/gdelt_zips_funcionando/20130104.zip
⬇️ Descargando: http://data.gdeltproject.org/events/20130105.export.CSV.zip
✅ Guardado: /content/gdelt_zips_funcionando/20130105.zip
⬇️ Descargando: http://data.gdeltproject.org/events/20130106.export.CSV.zip
✅ Guardado: /content/gdelt_zips_funcionando/20130106.zip
⬇️ Descargando: http://data.gdeltproject.org/events/20130107.export.CSV.zip
✅ Guardado: /content/gdelt_zips_funcionando/20130107.zip
⬇️ Descargando: http://data.gdeltproject.org/events/20130108.export.C

In [18]:
from google.colab import files
import glob

for file in glob.glob("/content/gdelt_zips_funcionando/*.zip"):
    files.download(file)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>