In [1]:
import json
from pathlib import Path

import numpy as np
import pandas as pd
from tqdm import tqdm
import Levenshtein
import matplotlib.pyplot as plt
import fastDamerauLevenshtein

In [2]:
# splits string in words
def split_strings(str1, str2):
    lst = [str1.split()]
    lst.append(str2.split())
    return lst

# checks if wordcount in both strings is equal
def same_wordcounts(lst1, lst2):
    return (len(lst1) == len(lst2))

# deletes non alphabetical characters from string
def skip_no_alpha(string):
    only_alpha = ""
    for char in string:
        if char.isalpha():
            only_alpha += char
    return only_alpha

# checks in numbers are increments
def is_increment(nr1, nr2):
    return (nr1+1 == nr2 or nr1-1 == nr2)

# checks if case (upper/loewr) of the first latter is switched
def is_first_letter_caseswitch(str1,str2):
    return (str1[0].isupper() and str2[0].islower() or str1[0].islower() and str2[0].isupper())


In [3]:
"[".isalpha()

False

In [4]:
def get_levenshtein_dists(lst1, lst2, only_alpha=False):
    if len(lst1) != len(lst2):
        print("Difference words counts of lists!")
        return
    dists = []
    if only_alpha:
        for i in range(len(lst1)):
            dists.append(int(fastDamerauLevenshtein.damerauLevenshtein(
                skip_no_alpha(lst1[i]), skip_no_alpha(lst2[i]), similarity=False)))
    else:
        for i in range(len(lst1)):
            dists.append(int(fastDamerauLevenshtein.damerauLevenshtein(
                lst1[i], lst2[i], similarity=False)))
    return dists

# splits strings in words


def get_words_and_dists(str1, str2, skip_no_alpha=False):
    words = split_strings(str1, str2)
    if len(words[0]) == len(words[1]):
        dists = get_levenshtein_dists(words[0], words[1], skip_no_alpha)
    else:
        dists = []
    return words, dists


In [5]:
def word_in_dict(str1, words_dict):
    return str1 in words_dict

def is_typo_fixed(str1, str2, words_dict, lowercase=True):
    """ Check if typo is fixed.
        return 0: no other case is found
        return 1: word was not in dict before (missspelled)
        return 2: word with swapped first letter (and other changes depending on edit distance)
    """
    # detects number errors (dreher,tippfehler), skipps increments. Only works if skip_no_alpha is false 
    if str1.isdigit() and str2.isdigit() and not is_increment(int(str1),int(str2)):
        return 3

    if is_first_letter_caseswitch(str1,str2):
        return 2

    if lowercase:
        str1=str1.lower()
        str2=str2.lower()

    # checks if str1 is not in dict but str2 is
    if (not word_in_dict(str1, words_dict) and word_in_dict(str2, words_dict)):
        return 1
        
    return 0

In [6]:
def get_typo_type(str1, str2, words_dict, only_alpha=False):
    words, levenshtein_dists = get_words_and_dists(
        str1, str2, only_alpha)
    typo_lst = []
    for i in range(len(levenshtein_dists)):  # only loops if dists are found (word counts are equal)
        if(levenshtein_dists[i] > 0 and levenshtein_dists[i] <= 2):  # only uses distances >0 <=2
            typo_lst.append(is_typo_fixed(
                words[0][i], words[1][i], words_dict))
        # else:  # appends None if dist is <0 or >2
        #     typo_lst.append(None)
    return typo_lst


In [7]:
my_file = open("../../../words_alpha.txt", "r")
words_dict=set(my_file.read().split("\n"))

testcase1 = ["Hier sind kkeine Fheler", "Hier sind keine Fehler"]
testcase1_en = ["There are nno erorrs", "There are no errors"]
typo_lst = get_typo_type(testcase1_en[0], testcase1_en[1], words_dict)
print(typo_lst)

[1, 1]


In [8]:
def typo_check(str1, str2, words_dict, skip_no_alpha=False):
    """Return True if typo
       Return False if no typo
    """
    typo_lst = get_typo_type(str1, str2, words_dict, skip_no_alpha)
    for typo_type in typo_lst:
        if typo_type > 0:  # 1 is previous not in dict, current is in dict, 2 case switch on first letter
            return True
    return False

In [9]:
input_data = Path("../../matched-infoboxes-extracted/")
inp = list(input_data.rglob('*.json'))
files = [x for x in inp if x.is_file()]
len(files) 

70

In [10]:
my_file = open("../../../words_alpha.txt", "r")
words_dict=set(my_file.read().split("\n"))

In [11]:
num_iteratons = 1
number_of_files = 5
number_of_files_start = 0
num_edits = 0
num_change_tuples = 0
typo_lst = []
for _ in range(num_iteratons):
    change_tuples = []
    for file in tqdm(files[number_of_files_start:number_of_files_start+number_of_files]):
        with open(file, 'r', encoding='utf-8') as f:
            for jsonObj in f:
                single_edit = json.loads(jsonObj)
                num_edits += 1
                title = single_edit['pageTitle']
                pageID = single_edit['pageID']
                key = single_edit['key']
                template = single_edit['template'] if 'template' in single_edit.keys(
                ) else None
                changes = single_edit['changes']
                validFrom = single_edit['validFrom']
                revisionId = single_edit['revisionId']
                attributes = single_edit['attributes'] if 'attributes' in single_edit.keys(
                ) else None
                user_name = single_edit['user']['username'] if 'username' in single_edit['user'].keys(
                ) else None
                user_id = single_edit['user']['id'] if 'id' in single_edit['user'].keys(
                ) else None
                user_ip = single_edit['user']['ip'] if 'ip' in single_edit['user'].keys(
                ) else None
                for change in changes:
                    num_change_tuples += 1
                    name = change['property']['name']
                    current_value = change['currentValue'] if 'currentValue' in change.keys(
                    ) else None
                    previous_value = change['previousValue'] if 'previousValue' in change.keys(
                    ) else None
                    validTo = change['valueValidTo'] if 'valueValidTo' in change.keys(
                    ) else None
                    change_tuples.append((title, pageID, key, template, name, previous_value,
                                          current_value, validFrom, validTo, revisionId, user_name, user_id, user_ip, attributes))
    number_of_files_start += number_of_files

    for i in tqdm(range(len(change_tuples))):
        # Check only changes (no creations/deletions)
        if(change_tuples[i][5] is not None and change_tuples[i][6] is not None):
            typo_lst.append(typo_check(
                change_tuples[i][5], change_tuples[i][6], words_dict, True))
        else:
            typo_lst.append(None)

print("Number of edits:", num_edits)
print("Number of change tuples:", num_change_tuples)


100%|██████████| 5/5 [00:13<00:00,  2.66s/it]
100%|██████████| 784540/784540 [00:02<00:00, 326396.34it/s]

Number of edits: 158789
Number of change tuples: 784540





In [12]:
counts = {"typo fixed": 0,
          "no typo": 0,
          "not tested": 0
          }
for typo in typo_lst:
    if typo is True:
        counts["typo fixed"] += 1
    if typo is False:
        counts["no typo"] += 1
    if typo is None:
        counts["not tested"] += 1
print(counts)
print(counts["typo fixed"]+counts["no typo"]+counts["not tested"])

{'typo fixed': 3274, 'no typo': 181994, 'not tested': 599272}
784540


In [13]:
print("typo fixed:", counts["typo fixed"])
print("no typo:", counts["no typo"])
print("not tested:", counts["not tested"])
print("typo fix %:",counts["typo fixed"]/(counts["typo fixed"]+counts["no typo"]))

typo fixed: 3274
no typo: 181994
not tested: 599272
typo fix %: 0.017671697217004557


In [14]:
# get idx of all typos
typo_idx = []
for i in range(len(typo_lst)):
    if typo_lst[i] == True:
        typo_idx.append(i)

## Time to Change

In [15]:
from datetime import datetime
from datetime import timedelta

def average_timedelta_between_changes(typo_idx, change_tuples):
    time_between_changes = timedelta(0)
    for idx in typo_idx:
        if change_tuples[idx][7] is not None and change_tuples[idx][8] is not None:
            time_between_changes += datetime.strptime(change_tuples[idx][8], '%Y-%m-%dT%H:%M:%SZ')-datetime.strptime(
                change_tuples[idx][7], '%Y-%m-%dT%H:%M:%SZ')
    return time_between_changes / len(typo_idx)

In [16]:
print("Average Time to change for a typofix")
str(average_timedelta_between_changes(typo_idx,change_tuples))

Average Time to change for a typofix


'119 days, 6:16:03.847587'

## Dataframe

In [17]:
data = pd.DataFrame(change_tuples, columns=['pageTitle', 'pageID', 'key', 'template', 'name', 'previous_value',
                    'current_value', 'validFrom', 'validTo', 'revisionId', 'user_name', 'user_id', 'user_ip', 'attributes'])
data['validFrom'] = pd.to_datetime(data['validFrom'])
data['validTo'] = pd.to_datetime(data['validTo'])

In [18]:
data.iloc[typo_idx].tail(20)

Unnamed: 0,pageTitle,pageID,key,template,name,previous_value,current_value,validFrom,validTo,revisionId,user_name,user_id,user_ip,attributes
770646,Hezbollah,13919,69252267-0,infobox political party,ideology,{{Unbulleted list |class=nowrap\n | [[Jihad#S...,{{Unbulleted list |class=nowrap\n | [[Islamic...,2016-12-21 09:45:14+00:00,2017-02-24 11:59:59+00:00,755990967,ZxxZxxZ,11676657.0,,"{'country': 'Lebanon', 'website': '[http://www..."
771360,Honolulu,13887,22576861-0,infobox city,map_caption,Location within the Island of [[Oahu|Oahu]] in...,Location within the island of [[Oahu]] in the ...,2006-10-21 19:48:25+00:00,2006-12-06 10:05:06+00:00,82863905,Colonies Chris,577301.0,,"{'utc_offset': '-10', 'official_name': 'Honolu..."
776609,Honolulu,13887,22576861-0,infobox settlement,official_name,"Honolulu, Hawaiil","Honolulu, Hawaii",2009-11-13 01:58:48+00:00,2009-11-27 22:46:17+00:00,325549585,HkCaGu,4301051.0,,"{'population_metro': '909,863', 'postal_code_t..."
777059,Honolulu,13887,22576861-0,infobox settlement,official_name,"Honolulu, Hawai","Honolulu, Hawaii",2010-01-22 05:05:28+00:00,2010-02-06 17:41:43+00:00,339294701,Nyttend,1960810.0,,"{'population_metro': '909,863', 'postal_code_t..."
777069,Honolulu,13887,22576861-0,infobox settlement,official_name,"Honolulu, Howaii","Honolulu, Hawaii",2010-02-06 17:42:46+00:00,2010-02-21 20:31:09+00:00,342321296,Tide rolls,7167267.0,,"{'population_metro': '909,863', 'postal_code_t..."
779108,Honolulu,13887,22576861-0,infobox settlement,name,Honalulu,Honolulu,2013-04-02 21:48:26+00:00,2013-04-19 00:17:09+00:00,548387920,The Devil's Advocate,3761899.0,,"{'population_metro': '953207', 'postal_code_ty..."
782088,Honolulu,13887,22576861-0,infobox settlement,timezone,[[Hawaii-Aleutian time zone|Hawaiian (HST)]],[[Hawaii–Aleutian Time Zone|Hawaiian (HST)]],2017-06-21 22:55:46+00:00,2017-09-28 23:33:34+00:00,786846237,,,87.6.21.159,"{'government_footnotes': '', 'blank_info': '15..."
782511,Honolulu,13887,22576861-0,infobox settlement,image_caption,Clockwise from top: Aerial view of [[Downtown ...,Clockwise from top: aerial view of [[Downtown ...,2018-05-23 14:02:58+00:00,2018-05-30 04:26:24+00:00,842604835,WhatsUpWorld,32077803.0,,"{'government_footnotes': '', 'blank_info': '15..."
782518,Honolulu,13887,22576861-0,infobox settlement,name,Honolululu,Honolulu,2018-05-30 18:04:31+00:00,2018-08-04 06:15:18+00:00,843674853,RetroCraft314,26078417.0,,"{'government_footnotes': '', 'blank_info': '15..."
783354,Second Polish Republic,14245,91255964-0,infobox former country,event_end,[[Invasion of Boland (1939)|Invasion]],[[Invasion of Poland (1939)|Invasion]],2009-06-24 10:37:18+00:00,2010-01-01 19:03:05+00:00,298323910,Skysmith,6995.0,,"{'symbol': 'Coat of arms of Poland', 'capital'..."


## Swear words

In [19]:
def check_swear(str1, str2, words_dict, lowercase=True):
    """ Check if swear got added or removed.
        Input:
            str1: prev string
            str2: curr string
        Output:
        prev false , curr true : 1 (swear word added)
        prev true , curr false : 2 (swear word removed)
    """
    if lowercase:
        str1=str1.lower()
        str2=str2.lower()

    str1_lst=str1.split()
    str2_lst=str2.split()

    prev_swear=False
    curr_swear=False
    for string in str1_lst:
        if word_in_dict(string, words_dict):
            prev_swear=True
            break

    for string in str2_lst:
        if word_in_dict(string, words_dict):
            curr_swear=True
            break

    if (not prev_swear and curr_swear):
        # swear word added
        return 1
    if (prev_swear and not curr_swear):
        # swear word removed
        return 2
    if (prev_swear and  curr_swear):
        # swear word in both
        return 3
    if (not prev_swear and not curr_swear):
        # swear word in none
        return 0

In [20]:
def is_not_empty_or_none(input):
    return input is not None and input is not ""

is_not_empty_or_none(None)

False

In [21]:
swear_file = open("../../../words_swear.txt", "r")
swear_dict = set(swear_file.read().split("\n"))
swear_dict.remove("nazi")

def is_not_empty_or_none(input):
    return input is not None and input is not ""


swear_lst = []
for i in tqdm(range(len(change_tuples))):
    if(is_not_empty_or_none(change_tuples[i][5]) and is_not_empty_or_none(change_tuples[i][6])):
        swear_lst.append(check_swear(
            change_tuples[i][5], change_tuples[i][6], swear_dict))
    else:
        swear_lst.append(None)


counts_swear = {"Swearwords added": 0,
                "Swearwords removed": 0,
                "Swearwords not touched": 0,
                "Swearwords not found": 0,
                "create or delete (skipped)": 0}
for test in swear_lst:
    if test is 1:
        counts_swear["Swearwords added"] += 1
    if test is 2:
        counts_swear["Swearwords removed"] += 1
    if test is 3:
        counts_swear["Swearwords not touched"] += 1
    if test is 0:
        counts_swear["Swearwords not found"] += 1
    if test is None:
        # prev or curr is None
        counts_swear["create or delete (skipped)"] += 1
print(counts_swear)

idx_swear = [[], []]
for i in range(len(swear_lst)):
    if swear_lst[i] == 1:
        idx_swear[0].append(i)
    if swear_lst[i] == 2:
        idx_swear[1].append(i)


100%|██████████| 784540/784540 [00:01<00:00, 657512.57it/s]


{'Swearwords added': 1765, 'Swearwords removed': 1733, 'Swearwords not touched': 340, 'Swearwords not found': 166411, 'create or delete (skipped)': 614291}


In [22]:
print("Swearwords added:", counts_swear["Swearwords added"])
print("Swearwords removed:", counts_swear["Swearwords removed"])
print("Swearwords not touched:", counts_swear["Swearwords not touched"])
print("Swearwords not found:", counts_swear["Swearwords not found"])
print("create or delete (skipped):", counts_swear["create or delete (skipped)"])
edit_count=0
for i, (k, v) in enumerate(counts_swear.items()):
    if i > 3:
        break
    edit_count += v
print("Toal words (without create/delete):", edit_count)
print("Percentage of swear words in edits added and removed:",
      counts_swear["Swearwords added"]/edit_count, counts_swear["Swearwords removed"]/edit_count)


Swearwords added: 1765
Swearwords removed: 1733
Swearwords not touched: 340
Swearwords not found: 166411
create or delete (skipped): 614291
Toal words (without create/delete): 170249
Percentage of swear words in edits added and removed: 0.010367168089093035 0.010179208101075483


## Swear words added

In [23]:
# todo: how handle empty strings?
def average_timedelta_between_changes(typo_idx, change_tuples):
    time_between_changes = timedelta(0)
    idx_out=[]
    for idx in typo_idx:
        if change_tuples[idx][7] is not None and change_tuples[idx][8] is not None:
            time_delta=datetime.strptime(change_tuples[idx][8], '%Y-%m-%dT%H:%M:%SZ')-datetime.strptime(
                change_tuples[idx][7], '%Y-%m-%dT%H:%M:%SZ')
            time_between_changes += time_delta
            if(time_delta.days>0):
                idx_out.append(idx)
    return idx_out, time_between_changes / len(typo_idx)

print("Average time to delete swearwords:",str(average_timedelta_between_changes(idx_swear[0],change_tuples)[1]))

Average time to delete swearwords: 2 days, 17:35:38.939943


In [24]:
idx=195624
tdelta=datetime.strptime(change_tuples[idx][8], '%Y-%m-%dT%H:%M:%SZ')-datetime.strptime(
                change_tuples[idx][7], '%Y-%m-%dT%H:%M:%SZ')
tdelta.days
print(data.iloc[195624])
check_swear(change_tuples[idx][5], change_tuples[idx][6], swear_dict)

pageTitle                                                 Apollo 11
pageID                                                          662
key                                                      54506549-0
template                                      infobox space mission
name                                             lunar_eva_duration
previous_value                                             02:31:40
current_value                                         2 h 31 m 40 s
validFrom                                 2008-04-12 15:11:54+00:00
validTo                                   2008-04-23 03:41:18+00:00
revisionId                                                205135170
user_name                                             Sardanaphalus
user_id                                                      427947
user_ip                                                        None
attributes        {'mission_name': '''Apollo 11''', 'lunar_modul...
Name: 195624, dtype: object


0

In [25]:
idx_out, _ = average_timedelta_between_changes(idx_swear[0], change_tuples)
data.iloc[idx_out].head(50)


Unnamed: 0,pageTitle,pageID,key,template,name,previous_value,current_value,validFrom,validTo,revisionId,user_name,user_id,user_ip,attributes
62128,Auschwitz concentration camp,2006,108795825-0,infobox world heritage site,name,Auschwitz Concentration Camp,Auschwitz-Birkenau. German Nazi Concentration ...,2007-06-28 12:17:11+00:00,2007-06-30 22:05:28+00:00,141156442,Kieraf~enwiki,4704928.0,,{'image': '[[Image:Auschwitz gate (tbertor1).j...
62169,Auschwitz concentration camp,2006,108795825-0,infobox world heritage site,name,Auschwitz Birkenau<br><small>Nazi-German Conce...,Auschwitz Birkenau<br><small>German Nazi Conce...,2008-04-03 14:21:17+00:00,2008-05-18 18:12:32+00:00,203047411,Teutonic Tamer,5827990.0,,{'image': '[[Image:Auschwitz gate (tbertor1).j...
62364,Auschwitz concentration camp,2006,108795825-0,infobox concentration camp,location,"[[Oświęcim]], German-occupied Poland","[[Oświęcim]], [[Polish areas annexed by Nazi G...",2010-07-28 22:30:20+00:00,2011-03-09 08:04:03+00:00,375989966,Dert45,10269380.0,,"{'gas chambers': '', 'color': '', 'lats': '09'..."
62454,Auschwitz concentration camp,2006,108795825-0,infobox concentration camp,operated by,"the German ''[[Schutzstaffel]]'' (SS), the [[N...","the Nazi ''[[Schutzstaffel]]'' (SS), the [[NKV...",2013-03-25 02:51:43+00:00,2013-08-26 01:36:10+00:00,546843485,Iscoak,11268616.0,,"{'gas chambers': '', 'color': '', 'lats': '09'..."
62456,Auschwitz concentration camp,2006,108795825-0,infobox concentration camp,type,[[Concentration camp|Concentration]] and [[ext...,German Nazi [[Concentration camp|Concentration...,2013-06-10 22:35:55+00:00,2013-08-07 00:21:37+00:00,559302180,Tom5551,13881192.0,,"{'gas chambers': '', 'color': '', 'lats': '09'..."
62633,Auschwitz concentration camp,2006,108795825-0,infobox concentration camp,embedded,{{designation list | embed=yes\n| designation1...,{{designation list | embed=yes\n| designation1...,2018-03-04 19:08:11+00:00,2018-04-30 02:50:30+00:00,828785022,Acroterion,1839637.0,,{'commandant': '{{ubl  |[[Rudolf Höss|Rudolf...
155729,Aphrodite,1174,227679395-0,infobox greek deity,god_of,'''Goddess of Love and Beauty''',"'''Goddess of Love, Sex and Beauty'''",2009-03-18 21:28:39+00:00,2009-03-21 23:37:18+00:00,278197586,Gemmologist,9043844.0,,"{'image': 'Venus de Milo Louvre Ma399 n4.jpg',..."
155757,Aphrodite,1174,227679395-0,infobox greek deity,god_of,'''Goddess of Love and Beauty''',"'''Goddess of Love, Sex and Beauty'''",2009-03-31 04:26:38+00:00,2009-04-07 21:35:36+00:00,280797680,Modernist,1653549.0,,"{'image': 'Venus de Milo Louvre Ma399 n4.jpg',..."
155799,Aphrodite,1174,227679395-0,infobox greek deity,god_of,"'''Goddess of Love, sexual reproduction and Be...","'''Goddess of Love, Sex and Beauty'''",2009-04-25 21:50:34+00:00,2009-04-28 02:45:07+00:00,286113910,Modernist,1653549.0,,"{'image': 'Venus de Milo Louvre Ma399 n4.jpg',..."
155805,Aphrodite,1174,227679395-0,infobox greek deity,god_of,'''Goddess of Love and Beauty''',"'''Goddess of Love, Sex and Beauty'''",2009-04-28 02:54:15+00:00,2009-05-06 00:06:14+00:00,286574120,Modernist,1653549.0,,"{'image': 'Venus de Milo Louvre Ma399 n4.jpg',..."


## Swear words removed

In [26]:
data.iloc[idx_swear[1]].head(50)

Unnamed: 0,pageTitle,pageID,key,template,name,previous_value,current_value,validFrom,validTo,revisionId,user_name,user_id,user_ip,attributes
2962,Albert,1504,157286038-0,infobox given name,meaning,Bitch,noble-bright,2009-01-31 01:11:47+00:00,2009-02-01 09:06:50+00:00,267518358,ClueBot,4928500.0,,"{'image': '', 'pronunciation': '', 'gender': '..."
3408,AOL,1397,20196791-0,infobox_company,company_name,America Online IS CRAP,America Online,2006-01-24 19:36:13+00:00,2006-01-30 16:38:38+00:00,36534563,DavidWBrooks,7643.0,,{'company_type': '[[Subsidiary]] of [[Time War...
4008,AOL,1397,20196791-0,infobox_company,company_slogan,"""Yes, we fucking suck!""","""So easy to use, no wonder we're #1!""",2006-06-13 04:48:54+00:00,2006-06-18 13:16:48+00:00,58337373,AntiVandalBot,1574574.0,,{'company_type': 'Owned by [[Time Warner]] (95...
4153,AOL,1397,20196791-0,infobox_company,company_name,AOL LLC Fuck AOL,AOL LLC,2006-07-24 08:07:18+00:00,2006-08-01 21:52:32+00:00,65514972,AntiVandalBot,1574574.0,,{'company_type': 'Owned by [[Time Warner]] (95...
7081,AOL,1397,109204137-3,infobox company,name,AOL LLC <- AOL SUCKS ASS AND IS THE WORST INT...,AOL LLC,2008-06-11 18:18:59+00:00,2008-06-11 18:19:49+00:00,218677813,ClueBot,4928500.0,,"{'num_employees': '8,000<ref>{{cite web  |u..."
7221,AOL,1397,109204137-3,infobox company,name,AOL LLC from the f------ hell,AOL LLC,2008-09-09 02:10:13+00:00,2008-09-16 23:39:59+00:00,237195088,Bfigura's puppy,7169406.0,,"{'num_employees': '8,000<ref>{{cite web  |u..."
7328,AOL,1397,109204137-3,infobox company,name,HILBILLYS LIKE LOCH NESS MONSTOR COCK,AOL LLC,2008-10-11 22:32:34+00:00,2008-10-26 19:16:47+00:00,244659188,IW.HG,7527214.0,,"{'owner': '[[Time Warner]]', 'num_employees': ..."
11707,Alexander Mackenzie (politician),1235,35281143-0,infobox prime minister,office,prime dick sucker of Canada,Prime Minister of Canada,2008-06-04 01:27:38+00:00,2008-06-04 01:32:54+00:00,216974228,Reneeholle,4620734.0,,"{'profession': 'Building Contractor, [[Archite..."
12865,Alexander Mackenzie (politician),1235,35281143-0,infobox officeholder,name,Alexander Mackenzie the porn star,Alexander Mackenzie,2015-09-15 19:49:07+00:00,2016-05-26 17:19:41+00:00,681199198,Zortwort,23664684.0,,"{'predecessor1': '[[John A. Macdonald]]', 'pre..."
12908,Alexander Mackenzie (politician),1235,35281143-0,infobox officeholder,name,Sir retard,Alexander Mackenzie,2016-12-07 17:30:46+00:00,2017-12-05 15:51:44+00:00,753517170,TwoTwoHello,17114440.0,,"{'predecessor1': '[[John A. Macdonald]]', 'pre..."
