In [14]:
#Imports
import mwclient 
import time 
from transformers import pipeline
from statistics import mean
import torch
import pandas as pd
from datetime import datetime


In [8]:
site = mwclient.Site('en.wikipedia.org') #sets site as wikipedia
page = site.pages['Bitcoin'] #And fetches the bitcoin page
rev = list(page.revisions()) #Make a list of all the revisions of the bitcoin wikipedia page
revs = sorted(rev, key=lambda rev: rev ["timestamp"]) #Sort the list from oldest to newest. 

In [9]:
revs[0]


OrderedDict([('revid', 275832581),
             ('parentid', 0),
             ('user', 'Pratyeka'),
             ('timestamp',
              time.struct_time(tm_year=2009, tm_mon=3, tm_mday=8, tm_hour=16, tm_min=41, tm_sec=7, tm_wday=6, tm_yday=67, tm_isdst=-1)),
             ('comment', 'creation (stub)')])

In [10]:
sentiment_pipeline = pipeline("sentiment-analysis")

def find_sentiment(text):
    sent = sentiment_pipeline([text[:250]])[0]
    score = sent["score"]
    if sent["label"] =="NEGATIVE":
        score *=-1
    return score

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


In [11]:
edits = {}

for rev in revs:        
    date = time.strftime("%Y-%m-%d", rev["timestamp"])
    if date not in edits:
        edits[date] = dict(sentiments=list(), edit_count=0)
    
    edits[date]["edit_count"] += 1
    
    comment = rev.get("comment", "")
    edits[date]["sentiments"].append(find_sentiment(comment))

In [12]:

for key in edits:
    if len(edits[key]["sentiments"]) > 0:
        edits[key]["sentiment"] = mean(edits[key]["sentiments"])
        edits[key]["neg_sentiment"] = len([s for s in edits[key]["sentiments"] if s < 0]) / len(edits[key]["sentiments"])
    else:
        edits[key]["sentiment"] = 0
        edits[key]["neg_sentiment"] = 0
    
    del edits[key]["sentiments"]

In [15]:

edits_df = pd.DataFrame.from_dict(edits, orient="index")
dates = pd.date_range(start="2009-03-08",end=datetime.today())


In [16]:
edits_df
edits_df.index = pd.to_datetime(edits_df.index)
dates = pd.date_range(start="2009-03-08",end=datetime.today())
edits_df = edits_df.reindex(dates, fill_value=0)


In [17]:
rolling_edits = edits_df.rolling(30, min_periods=30).mean()
rolling_edits = rolling_edits.dropna()
rolling_edits


Unnamed: 0,edit_count,sentiment,neg_sentiment
2009-04-06,0.133333,-0.018351,0.025000
2009-04-07,0.000000,0.000000,0.000000
2009-04-08,0.000000,0.000000,0.000000
2009-04-09,0.000000,0.000000,0.000000
2009-04-10,0.000000,0.000000,0.000000
...,...,...,...
2025-11-03,0.966667,-0.023050,0.190476
2025-11-04,0.966667,-0.023050,0.190476
2025-11-05,0.966667,-0.023050,0.190476
2025-11-06,1.000000,-0.054742,0.223810


In [18]:
rolling_edits.to_csv("wikipedia_edits.csv")
