# Generate Excel report and bar chart

In [None]:
import json
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt

current_time = datetime.now().strftime("%Y%m%d_%H%M%S")

with open("./data/monitored_emails.json") as fh:
    emails = json.load(fh)
with open("./data/breach_information.json") as fh:
    breaches = json.load(fh)

user_info_tags = []
try:
    with open("./data/tags-PII.txt", "r") as pii_fh:
        user_info_tags.extend([pii.strip().lower() for pii in pii_fh.readlines() if pii.strip()])
except:
    print("Failed to open file")
confidential_tags = []
try:
    with open("./data/tags-confidential_data.txt", "r") as ci_fh:
        confidential_tags.extend([cd.strip().lower() for cd in ci_fh.readlines() if cd.strip()])
except:
    print("Failed to open file")

combined = []
for email, data in emails.items():
    for breach in data["breaches"]:
        data = {
            "Account": email,
            "AccountName": email[:email.rfind("@")],
            "AccountDomain": email[email.rfind("@")+1:],
            "BreachName": breaches[breach]["Name"],
            "BreachTitle": breaches[breach]["Title"],
            "BreachDate": breaches[breach]["BreachDate"],
            "Domain": breaches[breach]["Domain"],
            "LeakedUserInfo": ", ".join([item for item in breaches[breach]["DataClasses"] if item.strip().lower() in user_info_tags]),
            "LeakedConfidentialInformation": ", ".join([item for item in breaches[breach]["DataClasses"] if item.strip().lower() in confidential_tags]),
            "Verified": breaches[breach]["IsVerified"],
            "CompromisedData": ", ".join(breaches[breach]["DataClasses"]),
            "IsSpamList": breaches[breach]["IsSpamList"],
            "IsSensitive": breaches[breach]["IsSensitive"],
            "IsMalware": breaches[breach]["IsMalware"]
        }
        combined.append(data)

df = pd.DataFrame(combined)
df2 = pd.DataFrame(breaches).T

with pd.ExcelWriter(f"CompromisedAccountReport-{current_time}.xlsx") as writer:
    df.to_excel(writer, sheet_name="CompromisedAccounts", index=False)
    fname = input("Filepath of previous report: ")
    previous = None
    try:
        previous = pd.read_excel(fname).T.to_dict()
    except:
        print("Failed to load previous file, skipping")
    if previous:
        prev_index = {f"{previous[idx]['Account']}_{previous[idx]['BreachTitle']}":idx for idx in previous}
        new_findings = []
        for acct in combined:
            search_term = f"{acct['Account']}_{acct['BreachTitle']}"
            if search_term not in prev_index:
                new_findings.append(acct)
        df_new = pd.DataFrame(new_findings)
        if not df_new.empty:
            df_new.to_excel(writer, sheet_name="Changes", index=False)
    
    df2[["Title", "Domain", "BreachDate", "PwnCount", 
        "DataClasses", "IsVerified", "IsFabricated", "IsSensitive", 
        "IsRetired", "IsSpamList", "IsMalware", "KIT_NOTIFICATION", "Description"]].to_excel(writer, sheet_name="BreachInfo", index=False)
    
sel = [False if not val else True for val in df.LeakedConfidentialInformation]

## List accounts in data breach

In [None]:
breach_name = input("Which dataleak to list, use data breach name")
for account in df[df.BreachName==breach_name].Account.unique().tolist():
    print(account)

## Plot number of accounts in breaches

In [None]:
number_of_elements = 15
breaches = df[sel].BreachName.copy()
df_subset = breaches.value_counts(ascending=False)

data = [{"BreachName":ua, "Count":df_subset[ua]} for ua in df_subset[:number_of_elements].index]
data.insert(0, {"BreachName": "others", "Count":df_subset[number_of_elements:].sum()})

df_temp = pd.DataFrame(data)
df_temp.sort_values("Count").plot(kind="barh", x="BreachName", y="Count", figsize=(10,6), legend=False)

## Top accounts appearing in leaks

In [None]:
number_of_elements = 10
accounts = df[sel].Account.copy()
df_subset = accounts.value_counts(ascending=False)

df_subset[df_subset > 5][:25].plot(kind="bar", figsize=(20,5))

## Top domains in leaks

In [None]:
number_of_elements = 10
account_domains = df[sel].AccountDomain.copy()
df_subset = account_domains.value_counts(ascending=False)
data = [{"AccountDomain":ua, "Count":df_subset[ua]} for ua in df_subset[:number_of_elements].index]
data.insert(0, {"AccountDomain": "others", "Count":df_subset[number_of_elements:].sum()})

df_temp = pd.DataFrame(data).set_index("AccountDomain")
df_temp.plot(kind="pie", figsize=(10,10), y="Count", autopct='%1.0f%%', ylabel="")

In [None]:
from app.monitor import create_table
from requests import post

ACCOUNT = input("Email account to generate notice for")
REPORTER = input("Reporter navn:")
DOMAIN = input("Fresh service domain")
if DOMAIN:
    TICKET = input("Fresh ticket id")
    KEY = input("Fresh API key")

if ACCOUNT and ACCOUNT in emails:
    LEAKS = create_table(breaches=breaches, findings=emails[ACCOUNT]["breaches"])
    notice = f"""Hei.<br />
Din brukerkonto med epost, <b>{ACCOUNT}</b>, har blitt oppdaget i en datalekkasje og det er sterkt anmodet å bytte passord på denne brukerkontoen snarest mulig.<br />
Hvis du også har <b>gjenbrukt passordet</b> på flere tjenester må disse også skiftes da passordet kan være eksponert.<br /><br />
Epost-adressen er funnet i lekkasjer for følgende tjenester. Dette betyr at tjenesten har enten utilsiktet publisert data offentlig eller blitt kompromittert av trusselaktører, med det resultat at dine data har blitt offentligjort.<br />
Under finner du informasjonen med beskrivelser av hva som har skjedd og hva som er eksponert.<br />
{LEAKS}
<br />
For mer info eller spørsmål kan du svare på denne saken.<br />
Med vennlig hilsen {REPORTER}"""
    with open("temp_notice.html", "w") as fhtml:
        fhtml.write(notice)
    if DOMAIN:
        request = post(f"https://{DOMAIN}/api/v2/tickets/{TICKET}/reply",
            json={"body": notice},
            headers={"authorization": f"Basic {KEY}"})
        print(request.status_code, request.content)
else:
    print(f"Email: '{ACCOUNT}' did not appear in the email list")