In [31]:
import os
import warnings

# Disable symlink warning
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"

# Optional: disable all warnings from HuggingFace
warnings.filterwarnings("ignore")

# Suppress IPython widget display errors
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "last_expr"


In [33]:
pip install mwclient


Note: you may need to restart the kernel to use updated packages.


In [35]:
import mwclient
import time

site = mwclient.Site('en.wikipedia.org')
page = site.pages['Bitcoin']

In [37]:
revs = list(page.revisions())

In [38]:
revs[0]

OrderedDict([('revid', 1294743166),
             ('parentid', 1292212004),
             ('minor', ''),
             ('user', 'OAbot'),
             ('timestamp',
              time.struct_time(tm_year=2025, tm_mon=6, tm_mday=9, tm_hour=14, tm_min=57, tm_sec=12, tm_wday=0, tm_yday=160, tm_isdst=-1)),
             ('comment',
              '[[Wikipedia:OABOT|Open access bot]]: doi updated in citation with #oabot.')])

In [39]:
revs = sorted(revs, key=lambda rev: rev["timestamp"]) 

In [40]:
revs[0]

OrderedDict([('revid', 275832581),
             ('parentid', 0),
             ('user', 'Pratyeka'),
             ('timestamp',
              time.struct_time(tm_year=2009, tm_mon=3, tm_mday=8, tm_hour=16, tm_min=41, tm_sec=7, tm_wday=6, tm_yday=67, tm_isdst=-1)),
             ('comment', 'creation (stub)')])

In [41]:
pip install transformers

Note: you may need to restart the kernel to use updated packages.


In [42]:
pip install transformers torch

Note: you may need to restart the kernel to use updated packages.


In [43]:
!pip install torch



In [44]:
pip show torch


Name: torch
Version: 2.7.1
Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration
Home-page: https://pytorch.org/
Author: PyTorch Team
Author-email: packages@pytorch.org
License: BSD-3-Clause
Location: D:\New folder\Lib\site-packages
Requires: filelock, fsspec, jinja2, networkx, setuptools, sympy, typing-extensions
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [45]:
import sys
print(sys.executable)

D:\New folder\python.exe


In [46]:
import torch
print(torch.__version__)

2.7.1+cpu


In [47]:
from transformers import pipeline

# Create the sentiment analysis pipeline
sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", framework="pt")

# Define a function to return readable sentiment
def find_sentiment(text):
    result = sentiment_pipeline(text[:250])[0]
    label = result["label"]
    score = result["score"]
    
    # Format output
    sentiment = f"Sentiment: {label} | Confidence: {score * 100:.2f}%"
    return sentiment

# Example test
print(find_sentiment("This is absolutely amazing!"))
print(find_sentiment("This is the worst thing ever."))


Device set to use cpu


Sentiment: POSITIVE | Confidence: 99.99%
Sentiment: NEGATIVE | Confidence: 99.97%


In [59]:
edits = {}

for rev in revs:        
    date = time.strftime("%Y-%m-%d", rev["timestamp"])
    if date not in edits:
        edits[date] = dict(sentiments=list(), edit_count=0)
    
    edits[date]["edit_count"] += 1
    
    comment = rev.get("comment", "")
    edits[date]["sentiments"].append(find_sentiment(comment))

In [73]:
from statistics import mean

def extract_score(sentiment_string):
    try:
        # Example: 'Sentiment: NEGATIVE | Confidence: 99.06%'
        if "Confidence:" in sentiment_string:
            label_part, conf_part = sentiment_string.split("|")
            score = float(conf_part.strip().replace("Confidence:", "").replace("%", ""))
            if "NEGATIVE" in label_part:
                score *= -1
            return score
    except Exception:
        return None
    return None

for key in edits:
    raw_sentiments = edits[key].get("sentiments", [])
    cleaned_scores = [extract_score(s) for s in raw_sentiments if isinstance(s, str)]
    cleaned_scores = [s for s in cleaned_scores if s is not None]

    if cleaned_scores:
        edits[key]["sentiment"] = mean(cleaned_scores)
        edits[key]["neg_sentiment"] = len([s for s in cleaned_scores if s < 0]) / len(cleaned_scores)
    else:
        edits[key]["sentiment"] = 0
        edits[key]["neg_sentiment"] = 0

    edits[key].pop("sentiments", None)  


In [75]:
import pandas as pd

edits_df = pd.DataFrame.from_dict(edits, orient="index")

In [77]:
edits_df

Unnamed: 0,edit_count,sentiment,neg_sentiment
2009-03-08,4,0,0
2009-08-05,1,0,0
2009-08-06,2,0,0
2009-08-14,1,0,0
2009-10-13,2,0,0
...,...,...,...
2025-05-20,1,0,0
2025-05-22,1,0,0
2025-05-24,1,0,0
2025-05-25,6,0,0


In [79]:
edits_df.index = pd.to_datetime(edits_df.index)

In [81]:
from datetime import datetime

dates = pd.date_range(start="2009-03-08",end=datetime.today())

In [83]:
edits_df = edits_df.reindex(dates, fill_value=0)

In [85]:
edits_df

Unnamed: 0,edit_count,sentiment,neg_sentiment
2009-03-08,4,0,0
2009-03-09,0,0,0
2009-03-10,0,0,0
2009-03-11,0,0,0
2009-03-12,0,0,0
...,...,...,...
2025-06-08,0,0,0
2025-06-09,1,0,0
2025-06-10,0,0,0
2025-06-11,0,0,0


In [87]:
rolling_edits = edits_df.rolling(30, min_periods=30).mean()

In [89]:
rolling_edits = rolling_edits.dropna()

In [91]:
rolling_edits

Unnamed: 0,edit_count,sentiment,neg_sentiment
2009-04-06,0.133333,0.0,0.0
2009-04-07,0.000000,0.0,0.0
2009-04-08,0.000000,0.0,0.0
2009-04-09,0.000000,0.0,0.0
2009-04-10,0.000000,0.0,0.0
...,...,...,...
2025-06-08,0.366667,0.0,0.0
2025-06-09,0.400000,0.0,0.0
2025-06-10,0.400000,0.0,0.0
2025-06-11,0.400000,0.0,0.0


In [93]:
rolling_edits.to_csv("wikipedia_edits155.csv")