In [3]:
import pandas as pd
import requests
import numpy 
import datetime
import psycopg2
import feedparser

In [4]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None) 
pd.set_option('display.width', 1000)       
pd.set_option('display.colheader_justify', 'left')

In [5]:
API_URL = "https://en.wikipedia.org/w/api.php"

In [68]:

def fetch_recent_changes(start_date):
    """
    Fetches recent changes from the MediaWiki API in Atom feed format.

    Args:
        start_date (str): The starting date in ISO 8601 format (e.g., "2024-10-31T00:00:00Z").
        days (int): Number of days to fetch changes from the start date.
        limit (int): Maximum number of changes to fetch.

    Returns:
        str: The response text (Atom feed).
    """
    # Base API URL
    API_URL = "https://en.wikipedia.org/w/api.php"

    # Parameters for the API request
    params = {
        "action": "query",
        "list": "recentchanges",
        "rcstart": start_date,
        "rclimit": "max",
        "rcprop": "title|timestamp|user|userid|comment",
        "format": "json"
    }
    # Make the GET request
    response = requests.get(API_URL, params=params)
    # Check for HTTP errors
    response.raise_for_status()
    # Print the full URL for debugging
    print("Request URL:", response.url)
    # Return the response text (Atom feed)
    data = response.json().get('query', {}).get('recentchanges', [])
    for i in data:
        if 'title' in i and ":" in i['title']:
            i['title'] = i['title'].split(":", 1)[-1]
    return data

# usecase usage
feed = fetch_recent_changes("2024-11-01T00:00:00Z")
feed


Request URL: https://en.wikipedia.org/w/api.php?action=query&list=recentchanges&rcstart=2024-10-31T00%3A00%3A00Z&rclimit=max&rcprop=title%7Ctimestamp%7Cuser%7Cuserid%7Ccomment&format=json


[{'type': 'categorize',
  'ns': 14,
  'title': 'Use mdy dates from October 2024',
  'user': 'Lepricavark',
  'userid': 28779459,
  'timestamp': '2024-10-31T00:00:00Z',
  'comment': '[[:Obe Blanc]] added to category, [[Special:WhatLinksHere/Obe Blanc|this page is included within other pages]]'},
 {'type': 'log',
  'ns': 6,
  'title': 'BhoomiThayiyaChochchalaMagafilmposter.jpg',
  'user': 'Explicit',
  'userid': 4842600,
  'timestamp': '2024-10-31T00:00:00Z',
  'comment': '[[WP:CSD#F5|F5]]: Unused non-free media file'},
 {'type': 'log',
  'ns': 6,
  'title': 'Better days album cover.jpg',
  'user': 'Explicit',
  'userid': 4842600,
  'timestamp': '2024-10-31T00:00:00Z',
  'comment': '[[WP:CSD#F5|F5]]: Unused non-free media file'},
 {'type': 'edit',
  'ns': 2,
  'title': 'Loibird90/sandbox',
  'user': 'Loibird90',
  'userid': 39674653,
  'timestamp': '2024-10-31T00:00:00Z',
  'comment': ''},
 {'type': 'log',
  'ns': 6,
  'title': '3776 LoveLetter.jpg',
  'user': 'Explicit',
  'userid': 484

In [69]:
df = pd.DataFrame(feed)

In [70]:
df

Unnamed: 0,type,ns,title,user,userid,timestamp,comment,anon
0,categorize,14,Use mdy dates from October 2024,Lepricavark,28779459,2024-10-31T00:00:00Z,"[[:Obe Blanc]] added to category, [[Special:Wh...",
1,log,6,BhoomiThayiyaChochchalaMagafilmposter.jpg,Explicit,4842600,2024-10-31T00:00:00Z,[[WP:CSD#F5|F5]]: Unused non-free media file,
2,log,6,Better days album cover.jpg,Explicit,4842600,2024-10-31T00:00:00Z,[[WP:CSD#F5|F5]]: Unused non-free media file,
3,edit,2,Loibird90/sandbox,Loibird90,39674653,2024-10-31T00:00:00Z,,
4,log,6,3776 LoveLetter.jpg,Explicit,4842600,2024-10-31T00:00:00Z,[[WP:CSD#F5|F5]]: Unused non-free media file,
5,log,6,Big Monster Aventura.jpg,Explicit,4842600,2024-10-31T00:00:00Z,[[WP:CSD#F5|F5]]: Unused non-free media file,
6,log,6,Beyond Meat logo 2021.svg,Explicit,4842600,2024-10-31T00:00:00Z,[[WP:CSD#F5|F5]]: Unused non-free media file,
7,log,6,Borderline Hymns.jpg,Explicit,4842600,2024-10-31T00:00:00Z,[[WP:CSD#F5|F5]]: Unused non-free media file,
8,log,6,Augusta Preparatory Day School Logo.jpg,Explicit,4842600,2024-10-31T00:00:00Z,[[WP:CSD#F5|F5]]: Unused non-free media file,
9,log,6,Brunei Malay Teachers Association logo.png,Explicit,4842600,2024-10-31T00:00:00Z,[[WP:CSD#F5|F5]]: Unused non-free media file,


In [83]:
db_params = {
    "dbname": "postgres",      # Replace with your database name
    "user": "postgres",   # Replace with your PostgreSQL username
    "password": "root",  # Replace with your PostgreSQL password
    "host": "localhost",       # Replace with your host (e.g., localhost)
    "port": 5432               # Replace with your PostgreSQL port (default is 5432)
}

In [94]:
conn = psycopg2.connect(**db_params)
cursor = conn.cursor()
cursor.execute(f"""
            CREATE TABLE IF NOT EXISTS public.wikipedia_api (
                title text,
                timestamp timestamp,
                "user" varchar,
                userid varchar,
                comment text,
                type text
            );
        """)
conn.commit()
cursor.close()
conn.close()

In [102]:
def load_to_postgres(df, table_name, db_params):
    try:
        # Connect to PostgreSQL
        conn = psycopg2.connect(**db_params)
        cursor = conn.cursor()
        cursor.execute(f"DROP TABLE IF EXISTS public.{table_name};")
        # Create table if not exists
        cursor.execute(f"""
            
            CREATE TABLE IF NOT EXISTS public.{table_name} (
                title TEXT,
                timestamp TIMESTAMP,
                "user" VARCHAR,
                userid VARCHAR,
                comment TEXT,
                type TEXT
            );
        """)

        # Insert data into the table
        for _, row in df.iterrows():
            cursor.execute(
                f"""INSERT INTO public.{table_name} (title, timestamp, "user", userid, comment, type) VALUES (%s, %s, %s, %s, %s, %s)""",
                (row['title'], row['timestamp'], row['user'], row['userid'], row.get('comment'), row['type'])
            )

        # Commit and close
        conn.commit()
        cursor.close()
        conn.close()
        print(f"Data loaded into table '{table_name}' successfully!")
    except Exception as e:
        print(f"Error: {e}")

In [103]:

load_to_postgres(df, "wikipedia_api", db_params)

Data loaded into table 'wikipedia_api' successfully!
