In [34]:

import json
from requests_sse import EventSource
import pandas as pd
import time

url = 'https://stream.wikimedia.org/v2/stream/mediawiki.recentchange'

# Adding headers can help in case the server requires specific request formatting
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36'
}

# Function to determine if the change is to a talk page
def is_talk_page(title):
    # Typically, talk pages start with "Talk:" or "<Language> talk:"
    # This will handle "Talk:", "User talk:", "Wikipedia talk:", etc.
    return any(title.lower().startswith(prefix) for prefix in ['talk:', 'wikipedia talk:', 'file talk:', 
                                                              'template talk:', 'help talk:', 'category talk:', 'portal talk:',
                                                              'book talk:', 'draft talk:', 'timedtext talk:', 'module talk:'])
# Helper function to clean the talk prefix
def clean_talk_prefix(title):
    prefixes = ['talk:', 'wikipedia talk:', 'file talk:', 'template talk:',
                'help talk:', 'category talk:', 'portal talk:', 'book talk:',
                'draft talk:', 'timedtext talk:', 'module talk:']
    title_lower = title.lower()
    for prefix in prefixes:
        if title_lower.startswith(prefix):
            return title[len(prefix):].strip()  # Remove prefix and extra spaces
    return title

data_list= [[],[]]
t_end = time.time() + 600
# Setting up the EventSource connection
with EventSource(url, headers=headers) as stream:
    for event in stream:
        if time.time() > t_end:
            break

        if event.type == 'message':
            try:
                # Parse the event data as JSON
                change = json.loads(event.data)
                # Check if the change is related to a talk page from Wikipedia
                if change['wiki'].endswith('wiki') and is_talk_page(change['title']) and change['bot'] == False and change['wiki']=='enwiki':
                    print('{user} edited {title} on {wiki}: {comment}'.format(
                        user=change['user'], title=clean_talk_prefix(change['title']), wiki=change['wiki'], comment = change['comment']))
                    data_list[0].append(clean_talk_prefix(change['title']))
                    data_list[1].append(change['comment'])
                   
                    data = {
                        "Title": data_list[0],
                        "Comment": data_list[1]
                    }
            
            except ValueError: 
                # In case of any issues in parsing JSON data
                continue

df = pd.DataFrame(data)
df.to_csv('edit.csv',index = False)





WikiMacaroons edited Madhukar Dattatraya Deoras on enwiki: Start
Οἶδα edited African Art Reframed: Reflections and Dialogues on Museum Culture on enwiki: unnecessary subtitle disambiguation
Simeon edited Gaëtan Duval on enwiki: [[Wikipedia:WikiProject|WikiProject]] tagging
Cloudz679 edited Petr Meindlschmid on enwiki: assess
Hipal edited Jonathan Swan on enwiki: /* Quickly look at revised article */ could you continue with what I was doing?
Kowal2701 edited 2025 Tanzanian election protests on enwiki: wikiprojects
Tizio C. Sempronio edited Rapid Support Forces on enwiki: /* Semi-protected edit request on 1 November 2025 */ new section
Hurricane Clyde edited 2025 United States federal government shutdown on enwiki: /* Not currently the longest */ Reply
EarthDude edited Vinayak Damodar Savarkar on enwiki: /* "Vināyak Dāmodar Sāvarkar" listed at Redirects for discussion */ new section
Steven Walling edited WikiProject Plants/Events/Garden Parties/Hoya on enwiki: /* Hoya macrophylla vs Hoya