In [1]:
!pip install mwclient



In [2]:
pip install tensorflow



In [3]:
import mwclient
import time

site=mwclient.Site("en.wikipedia.org")
page=site.pages["Bitcoin"]

In [4]:
revs=list(page.revisions())

In [5]:
revs[0]

OrderedDict([('revid', 1191352132),
             ('parentid', 1191342042),
             ('user', 'Grayfell'),
             ('timestamp',
              time.struct_time(tm_year=2023, tm_mon=12, tm_mday=23, tm_hour=0, tm_min=46, tm_sec=27, tm_wday=5, tm_yday=357, tm_isdst=-1)),
             ('comment',
              'Undid revision 1191342042 by [[Special:Contributions/TarkusAB|TarkusAB]] ([[User talk:TarkusAB|talk]]) This is a proportionate summary of countless reliable sources. Discuss on the talk page if absolutely necessary.')])

In [6]:
from transformers import pipeline
sentiment_pipeline=pipeline("sentiment-analysis")

def find_sentiment(text):
    sent=sentiment_pipeline([text[:250]])[0]
    score=sent["score"]
    if sent["label"]=="NEGATIVE":
        score *= -1
    return score



In [7]:
from statistics import mean

edits = {}
for rev in revs:
    date = time.strftime("%Y-%m-%d", rev["timestamp"])
    
    if date not in edits:
        edits[date] = dict(sentiments=list(), edit_count=0)
        
    edits[date]["edit_count"] += 1
    
    # Check if 'comment' key exists in the revision
    if "comment" in rev:
        comment = rev["comment"]
        edits[date]["sentiments"].append(find_sentiment(comment))
    else:
        # Handle the case where 'comment' does not exist
        pass  # or some other logic

for key in edits:
    if len(edits[key]["sentiments"]) > 0:
        edits[key]["sentiment"] = mean(edits[key]["sentiments"])
        edits[key]["neg_+sentiment"] = len([s for s in edits[key]["sentiments"] if s < 0]) / len(edits[key]["sentiments"])
    else:
        edits[key]["sentiment"] = 0
        edits[key]["neg_sentiment"] = 0
        
    del edits[key]["sentiments"]

# Print the final edits dictionary
print(edits)


{'2023-12-23': {'edit_count': 1, 'sentiment': 0.973699152469635, 'neg_+sentiment': 0.0}, '2023-12-22': {'edit_count': 1, 'sentiment': -0.9907625317573547, 'neg_+sentiment': 1.0}, '2023-12-06': {'edit_count': 2, 'sentiment': -0.0004887580871582031, 'neg_+sentiment': 0.5}, '2023-12-05': {'edit_count': 1, 'sentiment': -0.9955110549926758, 'neg_+sentiment': 1.0}, '2023-12-02': {'edit_count': 3, 'sentiment': -0.9133161505063375, 'neg_+sentiment': 1.0}, '2023-11-30': {'edit_count': 1, 'sentiment': -0.9972746968269348, 'neg_+sentiment': 1.0}, '2023-11-29': {'edit_count': 4, 'sentiment': -0.5108187794685364, 'neg_+sentiment': 0.75}, '2023-11-28': {'edit_count': 16, 'sentiment': 0.05439089983701706, 'neg_+sentiment': 0.4375}, '2023-11-27': {'edit_count': 3, 'sentiment': -0.47687047719955444, 'neg_+sentiment': 0.6666666666666666}, '2023-11-26': {'edit_count': 6, 'sentiment': -0.6672774354616801, 'neg_+sentiment': 0.8333333333333334}, '2023-11-25': {'edit_count': 7, 'sentiment': 0.080698166574750

In [8]:
import pandas as pd
edits_df=pd.DataFrame.from_dict(edits, orient="index")

In [9]:
edits_df

Unnamed: 0,edit_count,sentiment,neg_+sentiment
2023-12-23,1,0.973699,0.00
2023-12-22,1,-0.990763,1.00
2023-12-06,2,-0.000489,0.50
2023-12-05,1,-0.995511,1.00
2023-12-02,3,-0.913316,1.00
...,...,...,...
2009-10-13,2,-0.227500,0.50
2009-08-14,1,0.930021,0.00
2009-08-06,2,0.995746,0.00
2009-08-05,1,0.748121,0.00


In [10]:
edits_df.index=pd.to_datetime(edits_df.index)

In [11]:
from datetime import datetime
dates=pd.date_range(start="2009-03-08", end=datetime.today())

In [12]:
dates

DatetimeIndex(['2009-03-08', '2009-03-09', '2009-03-10', '2009-03-11',
               '2009-03-12', '2009-03-13', '2009-03-14', '2009-03-15',
               '2009-03-16', '2009-03-17',
               ...
               '2023-12-16', '2023-12-17', '2023-12-18', '2023-12-19',
               '2023-12-20', '2023-12-21', '2023-12-22', '2023-12-23',
               '2023-12-24', '2023-12-25'],
              dtype='datetime64[ns]', length=5406, freq='D')

In [13]:
edits_df=edits_df.reindex(dates, fill_value=0)

In [14]:
edits_df

Unnamed: 0,edit_count,sentiment,neg_+sentiment
2009-03-08,4,-0.550525,0.75
2009-03-09,0,0.000000,0.00
2009-03-10,0,0.000000,0.00
2009-03-11,0,0.000000,0.00
2009-03-12,0,0.000000,0.00
...,...,...,...
2023-12-21,0,0.000000,0.00
2023-12-22,1,-0.990763,1.00
2023-12-23,1,0.973699,0.00
2023-12-24,0,0.000000,0.00


In [15]:
rolling_edits=edits_df.rolling(30).mean()

In [16]:
rolling_edits

Unnamed: 0,edit_count,sentiment,neg_+sentiment
2009-03-08,,,
2009-03-09,,,
2009-03-10,,,
2009-03-11,,,
2009-03-12,,,
...,...,...,...
2023-12-21,9.433333,-0.163431,0.276473
2023-12-22,6.466667,-0.196951,0.294251
2023-12-23,4.466667,-0.152213,0.271847
2023-12-24,1.500000,-0.148118,0.253869


In [17]:
rolling_edits=rolling_edits.dropna()

In [18]:
rolling_edits

Unnamed: 0,edit_count,sentiment,neg_+sentiment
2009-04-06,0.133333,-0.018351,0.025000
2009-04-07,0.000000,0.000000,0.000000
2009-04-08,0.000000,0.000000,0.000000
2009-04-09,0.000000,0.000000,0.000000
2009-04-10,0.000000,0.000000,0.000000
...,...,...,...
2023-12-21,9.433333,-0.163431,0.276473
2023-12-22,6.466667,-0.196951,0.294251
2023-12-23,4.466667,-0.152213,0.271847
2023-12-24,1.500000,-0.148118,0.253869


In [19]:
rolling_edits.to_csv("wikipedia_edits.csv")