In [1]:
import mwclient
import time
import transformers

site = mwclient.Site('en.wikipedia.org')
page = site.pages['Solana_(blockchain_platform)']

In [2]:
revs = list(page.revisions())

In [3]:
revs[0]

OrderedDict([('revid', 1214126707),
             ('parentid', 1213607691),
             ('user', 'MrOllie'),
             ('timestamp',
              time.struct_time(tm_year=2024, tm_mon=3, tm_mday=17, tm_hour=2, tm_min=56, tm_sec=22, tm_wday=6, tm_yday=77, tm_isdst=-1)),
             ('comment', 'tense')])

In [4]:
revs = sorted(revs, key=lambda rev: rev["timestamp"]) 

In [5]:
revs[0]

OrderedDict([('revid', 1043592861),
             ('parentid', 0),
             ('user', 'Heroeswithmetaphors'),
             ('timestamp',
              time.struct_time(tm_year=2021, tm_mon=9, tm_mday=10, tm_hour=22, tm_min=14, tm_sec=47, tm_wday=4, tm_yday=253, tm_isdst=-1)),
             ('comment',
              '[[WP:AES|←]]Created page with \'\'\'\'Solana\'\'\' is a public [[blockchain]] platform. It is [[open-source]] and [[Decentralized computing|decentralized]], with consensus achieved using [[proof of stake]]  and  proof of history. It can facilitate peer-to-peer transactions with its internal [[cryptocurrency]], \'\'\'SOL\'\'\', and   claims to   support 50,000 [[transactions per second]].  Anatoly Yakovenko is the founder and CEO of Solana.<ref name="Why Solana" /> \'\'[[Bloomberg]]\'\' considers Solana t...\'')])

In [6]:
from transformers import pipeline
sentiment_pipeline = pipeline("sentiment-analysis")

def find_sentiment(text):
    sent = sentiment_pipeline([text[:250]])[0]
    score = sent["score"]
    if sent["label"] == "NEGATIVE":
        score *= -1
    return score

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.





In [7]:
edits = {}

for rev in revs:        
    date = time.strftime("%Y-%m-%d", rev["timestamp"])
    if date not in edits:
        edits[date] = dict(sentiments=list(), edit_count=0)
    
    edits[date]["edit_count"] += 1
    
    comment = rev.get("comment", "")
    edits[date]["sentiments"].append(find_sentiment(comment))

In [8]:
from statistics import mean

for key in edits:
    if len(edits[key]["sentiments"]) > 0:
        edits[key]["sentiment"] = mean(edits[key]["sentiments"])
        edits[key]["neg_sentiment"] = len([s for s in edits[key]["sentiments"] if s < 0]) / len(edits[key]["sentiments"])
    else:
        edits[key]["sentiment"] = 0
        edits[key]["neg_sentiment"] = 0
    
    del edits[key]["sentiments"]

In [9]:
import pandas as pd
edits_df = pd.DataFrame.from_dict(edits, orient="index")

In [10]:
edits_df

Unnamed: 0,edit_count,sentiment,neg_sentiment
2021-09-10,2,-0.026268,0.500000
2021-09-11,2,-0.125361,0.500000
2021-09-13,3,0.185897,0.333333
2021-09-14,1,0.748121,0.000000
2021-09-15,4,-0.119876,0.500000
...,...,...,...
2024-02-19,6,-0.640141,0.833333
2024-02-20,8,-0.010332,0.500000
2024-02-21,3,-0.331259,0.666667
2024-03-14,1,0.998236,0.000000


In [11]:
rolling_edits = edits_df.rolling(30, min_periods=30).mean()
rolling_edits = rolling_edits.dropna()
rolling_edits

Unnamed: 0,edit_count,sentiment,neg_sentiment
2021-12-02,2.466667,-0.115112,0.526768
2021-12-06,2.433333,-0.147124,0.543434
2021-12-08,2.400000,-0.176140,0.560101
2021-12-09,2.466667,-0.215607,0.582323
2021-12-13,2.533333,-0.234919,0.593434
...,...,...,...
2024-02-19,2.933333,-0.214769,0.604246
2024-02-20,3.100000,-0.202918,0.598690
2024-02-21,3.166667,-0.180640,0.587579
2024-03-14,3.133333,-0.147472,0.570913


In [12]:
rolling_edits.to_csv("solana_sentiment.csv")