In [1]:
import mwclient
import time
import transformers

site = mwclient.Site('en.wikipedia.org')
page = site.pages['Binance']

In [2]:
revs = list(page.revisions())

In [3]:
revs[0]

OrderedDict([('revid', 1227647510),
             ('parentid', 1226517608),
             ('user', 'Citation bot'),
             ('timestamp',
              time.struct_time(tm_year=2024, tm_mon=6, tm_mday=7, tm_hour=0, tm_min=50, tm_sec=24, tm_wday=4, tm_yday=159, tm_isdst=-1)),
             ('comment',
              'Altered title. Add: website, authors 1-1. Removed parameters. Some additions/deletions were parameter name changes. | [[:en:WP:UCB|Use this bot]]. [[:en:WP:DBUG|Report bugs]]. | Suggested by Abductive | [[Category:Wikipedia articles needing factual verification from May 2024]] | #UCB_Category 14/218')])

In [4]:
revs = sorted(revs, key=lambda rev: rev["timestamp"]) 

In [5]:
revs[0]

OrderedDict([('revid', 817963944),
             ('parentid', 0),
             ('user', 'El fo0'),
             ('timestamp',
              time.struct_time(tm_year=2017, tm_mon=12, tm_mday=31, tm_hour=17, tm_min=7, tm_sec=33, tm_wday=6, tm_yday=365, tm_isdst=-1)),
             ('comment',
              "[[WP:AES|←]]Created page with '{{Infobox company | name = Binance | foundation = 2016 | location = [[Hong Kong]], China | products = [[Cryptocurrency exchange]] | homepage = [https://www.binan...'")])

In [6]:
from transformers import pipeline
sentiment_pipeline = pipeline("sentiment-analysis")

def find_sentiment(text):
    sent = sentiment_pipeline([text[:250]])[0]
    score = sent["score"]
    if sent["label"] == "NEGATIVE":
        score *= -1
    return score

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.





In [7]:
edits = {}

for rev in revs:        
    date = time.strftime("%Y-%m-%d", rev["timestamp"])
    if date not in edits:
        edits[date] = dict(sentiments=list(), edit_count=0)
    
    edits[date]["edit_count"] += 1
    
    comment = rev.get("comment", "")
    edits[date]["sentiments"].append(find_sentiment(comment))

In [8]:
from statistics import mean

for key in edits:
    if len(edits[key]["sentiments"]) > 0:
        edits[key]["sentiment"] = mean(edits[key]["sentiments"])
        edits[key]["neg_sentiment"] = len([s for s in edits[key]["sentiments"] if s < 0]) / len(edits[key]["sentiments"])
    else:
        edits[key]["sentiment"] = 0
        edits[key]["neg_sentiment"] = 0
    
    del edits[key]["sentiments"]

In [9]:
import pandas as pd
edits_df = pd.DataFrame.from_dict(edits, orient="index")

In [10]:
edits_df

Unnamed: 0,edit_count,sentiment,neg_sentiment
2017-12-31,4,0.315109,0.250000
2018-01-04,1,-0.998359,1.000000
2018-01-15,1,-0.997948,1.000000
2018-01-21,1,-0.999722,1.000000
2018-01-22,1,-0.999730,1.000000
...,...,...,...
2024-05-15,3,0.332359,0.333333
2024-05-19,2,-0.013685,0.500000
2024-05-24,1,-0.956601,1.000000
2024-05-31,3,0.287008,0.333333


In [11]:
rolling_edits = edits_df.rolling(30, min_periods=30).mean()
rolling_edits = rolling_edits.dropna()
rolling_edits

Unnamed: 0,edit_count,sentiment,neg_sentiment
2018-05-27,3.533333,-0.022832,0.474136
2018-05-28,3.433333,-0.060801,0.499136
2018-05-30,3.466667,-0.060816,0.499136
2018-06-02,3.466667,-0.060869,0.499136
2018-06-10,3.533333,-0.000630,0.465802
...,...,...,...
2024-05-15,2.100000,-0.133190,0.531966
2024-05-19,2.133333,-0.100380,0.515299
2024-05-24,2.100000,-0.099293,0.515299
2024-05-31,2.166667,-0.056446,0.493077


In [12]:
rolling_edits.to_csv("binance_sentiment.csv")