In [95]:
import pandas as pd
import numpy as np

In [130]:
cards = pd.read_excel("raw/WarCard935.xlsx")
cards.columns = [k.lower().replace(" ", "_") for k in cards.columns]

cards = cards.rename(columns={
    "cpiid": "cpi_id",
    "contet": "content",
    "link": "url"
})

cards = cards.drop(columns=["our_speaker", "name"])

In [131]:
cards["deception"] = cards["deception"].astype(bool)
cards["subject"] = cards["subject"].str.split("||", regex=False)
cards["person"] = cards["person"].str.split("||", regex=False)

In [132]:
people = {
    "Bush": "George W. Bush",
    "Powell": "Colin Powell",
    "Rumsfeld": "Donald Rumsfeld",
    "Fleischer": "Ari Fleischer",
    "Wolfowitz": "Paul Wolfowitz",
    "Rice": "Condoleezza Rice",
    "Cheney": "Richard B. Cheney",
    "McClellan": "Scott McClellan",
    "bin Laden": "Osama bin Laden",
    "Myers": "Richard B. Myers",
    "Tenet": "George Tenet",
    "Clarke": "Richard Clarke",
    "Woolsey": "Richard Woolsey",
    "Franks": "Gen. Tommy Franks",
    "Feith": "Douglas Feith",
    "Blair": "Tony Blair",
    "Annan": "Kofi Annan",
    "Blix": "Hans Blix",
    "Hadley": "Stephen Hadley",
    "Kay": "David Kay",
    "Hussein": "Saddam Hussein"
}

In [133]:
cards.speaker = cards.speaker.map(people)
cards.person = cards.person.map(lambda x: [people.get(p, p) for p in x] if isinstance(x, list) else x)

In [134]:
cards

Unnamed: 0,cpi_id,date,title,content,deception,subject,person,url,updated_link,source,speaker
0,1,2001-09-11,Al Qaeda Terrorists Attack United States,Nineteen terrorists affiliated with Al Qaeda h...,False,[Al Qaeda],,http://www.9-11commission.gov/report/index.htm,,SOURCE: Adapted from&nbsp;<EM>The 9/11 Commiss...,
1,2,2001-09-11,President Bush Addresses the Nation,Editor's note: President Bush speaks to the na...,False,[Iraq],[George W. Bush],http://georgewbush-whitehouse.archives.gov/new...,,SOURCE: Office of the White House Press Secret...,
2,3,2001-09-11,"Christopher Meyer, <EM>DC Confidential</EM>:<E...",After my council of war on the morning of 9/11...,False,"[Iraq, Al Qaeda]","[Condoleezza Rice, Osama bin Laden]",,,"SOURCE: Christopher Meyer, <EM>DC Confidential...",
3,4,2001-09-11,Defense Secretary Donald Rumsfeld Tells Genera...,"On the afternoon of September 11, according to...",False,[Iraq],"[Richard B. Myers, Donald Rumsfeld, Saddam Hus...",http://www.9-11commission.gov/report/911Report...,,<P>SOURCE: <EM>The 9/11 Commission Report: Fin...,
4,5,2001-09-11,Questions Arise at National Security Council M...,"By late in the evening of September 11, the pr...",False,[Iraq],"[George W. Bush, Donald Rumsfeld, Condoleezza ...",http://www.9-11commission.gov/report/911Report...,,<P>SOURCE: <EM>The 9/11 Commission Report: Fin...,
...,...,...,...,...,...,...,...,...,...,...,...
1238,1251,2003-07-29,"Deputy Defense Secretary Paul Wolfowitz, Testi...",<P>Senator Lincoln Chafee: And so all the test...,True,"[Iraq, WMD, FS]","[Saddam Hussein, George W. Bush, Paul Wolfowitz]",http://findarticles.com/p/articles/mi_m0PAH/is...,http://www.defense.gov/Speeches/Speech.aspx?Sp...,SOURCE: Office of the Assistant Secretary of D...,Paul Wolfowitz
1239,1252,2003-09-07,"National Security Adviser Condoleezza Rice, In...","Tony Snow: Do you believe, because this is con...",True,"[Iraq, WMD, chemical, biological, Al Qaeda, FS]","[Saddam Hussein, George W. Bush, Condoleezza R...","http://www.foxnews.com/story/0,2933,96651,00.html",,"SOURCE: Condoleezza Rice,&nbsp;interview on&nb...",Condoleezza Rice
1240,1253,2004-01-01,<EM>Report of the Select Committee on Intellig...,"<P class=MsoNormal style=""MARGIN: 0in 0in 0pt""...",False,"[Al Qaeda, WMD, chemical, biological, Iraq]",,,,"<P class=MsoNormal style=""MARGIN: 0in 0in 0pt""...",
1241,1254,2005-03-01,"Ari Fleischer, <EM>Taking the Heat</EM>: Ari F...","<P class=MsoNormal style=""MARGIN: 0in 0in 0pt""...",False,"[Iraq, WMD, biological]","[George W. Bush, Ari Fleischer, Saddam Hussein...",,,"<P class=MsoNormal style=""MARGIN: 0in 0in 0pt""...",


In [135]:
cards.speaker.value_counts()

George W. Bush       158
Colin Powell         134
Donald Rumsfeld       64
Ari Fleischer         62
Paul Wolfowitz        49
Condoleezza Rice      32
Richard B. Cheney     23
Scott McClellan       11
Name: speaker, dtype: int64

In [127]:
timeline = pd.read_excel("raw/Timeline935.xlsx")

timeline.columns = ["date", "event", "source"]
timeline = timeline.loc[timeline.date.notna()]
timeline

Unnamed: 0,date,event,source
0,2001-09-11,"On the morning of September 11, 2001, 19 Al Qa...",Spreadsheet
1,2001-09-12,Bush asks a group of national security adviser...,Spreadsheet
2,2001-09-17,"In a memo to Donald Rumsfeld, deputy secretary...",Spreadsheet
3,2001-09-18,A memo sent to Condoleeza Rice from national s...,Spreadsheet
4,2001-10-05,The first death by anthrax poisoning is report...,Spreadsheet
...,...,...,...
62,2003-12-13,Saddam Hussein is captured in a spider hole ne...,http://www.motherjones.com/bush_war_timeline
63,2004-01-23,"David Kay, lead weapons inspector in Iraq, res...",Spreadsheet
64,2004-01-24,"In an interview with NPR, Cheney says the hunt...",Spreadsheet
65,2004-04-18,"Spain’s new prime minister pulls 1,300 troops ...",http://usiraq.procon.org/view.resource.php?res...


In [128]:
timeline.to_csv("timeline.csv", index=False)

In [129]:
cards_export = cards.copy()

cards_export["subject"] = cards_export['subject'].str.join(", ")
cards_export["person"] = cards_export['person'].str.join(", ")

cards_export.to_csv("cards.csv", index=False)