In [81]:
# Dependencies and Setup
%matplotlib inline
%matplotlib notebook
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from matplotlib.patches import ConnectionPatch

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
cyber_security_breaches_data_to_load = "resources/Cyber_Security_Breaches.csv"
cyberattacks_data_to_load = "resources/cyberattacks.csv"

Cyber_Security_Breaches = pd.read_csv(cyber_security_breaches_data_to_load, encoding="ISO-8859-1")
cyberattacks_data = pd.read_csv(cyberattacks_data_to_load, encoding="ISO-8859-1")

# Read the Csv Files together Joining them
all_data = pd.merge(Cyber_Security_Breaches, cyberattacks_data, how="inner", on=["Year"])
# Combine the data into a single dataset

all_data.head(50)
# Cyber_Security_Breaches.head()
# cyberattacks_data.head(50)

Unnamed: 0.1,Unnamed: 0,Number,Name_of_Covered_Entity,State,Business_Associate_Involved,Individuals_Affected,Date_of_Breach,Type_of_Breach,Location_of_Breached_Information,Date_Posted_or_Updated,Summary,breach_start,breach_end,Year,Entity,Records,Organization Type,Method
0,28,27,Ashley and Gray DDS,MO,,9309,1/10/2010,Theft,Desktop Computer,1/23/2014,,1/10/2010,,2010,"Ankle & Foot Center of Tampa Bay, Inc.",156000,Healthcare,hacked
1,28,27,Ashley and Gray DDS,MO,,9309,1/10/2010,Theft,Desktop Computer,1/23/2014,,1/10/2010,,2010,AT&T,114000,Telecoms,hacked
2,28,27,Ashley and Gray DDS,MO,,9309,1/10/2010,Theft,Desktop Computer,1/23/2014,,1/10/2010,,2010,Betfair,2300000,Web,hacked
3,28,27,Ashley and Gray DDS,MO,,9309,1/10/2010,Theft,Desktop Computer,1/23/2014,,1/10/2010,,2010,Colorado government,105470,Healthcare,lost / stolen computer
4,28,27,Ashley and Gray DDS,MO,,9309,1/10/2010,Theft,Desktop Computer,1/23/2014,,1/10/2010,,2010,Educational Credit Management Corporation,3300000,Financial,lost / stolen media
5,28,27,Ashley and Gray DDS,MO,,9309,1/10/2010,Theft,Desktop Computer,1/23/2014,,1/10/2010,,2010,Embassy Cables,251000,Government,inside job
6,28,27,Ashley and Gray DDS,MO,,9309,1/10/2010,Theft,Desktop Computer,1/23/2014,,1/10/2010,,2010,"Emergency Healthcare Physicians, Ltd.",180111,Healthcare,lost / stolen media
7,28,27,Ashley and Gray DDS,MO,,9309,1/10/2010,Theft,Desktop Computer,1/23/2014,,1/10/2010,,2010,Federal Reserve Bank of Cleveland,400000,Financial,hacked
8,28,27,Ashley and Gray DDS,MO,,9309,1/10/2010,Theft,Desktop Computer,1/23/2014,,1/10/2010,,2010,Gawker,1500000,Web,hacked
9,28,27,Ashley and Gray DDS,MO,,9309,1/10/2010,Theft,Desktop Computer,1/23/2014,,1/10/2010,,2010,JP Morgan Chase,2600000,Financial,lost / stolen media


In [82]:
cols = list(all_data)
cols.insert(0, cols.pop(cols.index('Year')))
cols

['Year',
 'Unnamed: 0',
 'Number',
 'Name_of_Covered_Entity',
 'State',
 'Business_Associate_Involved',
 'Individuals_Affected',
 'Date_of_Breach',
 'Type_of_Breach',
 'Location_of_Breached_Information',
 'Date_Posted_or_Updated',
 'Summary',
 'breach_start',
 'breach_end',
 'Entity ',
 'Records ',
 'Organization Type ',
 'Method ']

In [83]:
all_data = all_data.ix[:, cols]
all_data.head(50)

Unnamed: 0.1,Year,Unnamed: 0,Number,Name_of_Covered_Entity,State,Business_Associate_Involved,Individuals_Affected,Date_of_Breach,Type_of_Breach,Location_of_Breached_Information,Date_Posted_or_Updated,Summary,breach_start,breach_end,Entity,Records,Organization Type,Method
0,2010,28,27,Ashley and Gray DDS,MO,,9309,1/10/2010,Theft,Desktop Computer,1/23/2014,,1/10/2010,,"Ankle & Foot Center of Tampa Bay, Inc.",156000,Healthcare,hacked
1,2010,28,27,Ashley and Gray DDS,MO,,9309,1/10/2010,Theft,Desktop Computer,1/23/2014,,1/10/2010,,AT&T,114000,Telecoms,hacked
2,2010,28,27,Ashley and Gray DDS,MO,,9309,1/10/2010,Theft,Desktop Computer,1/23/2014,,1/10/2010,,Betfair,2300000,Web,hacked
3,2010,28,27,Ashley and Gray DDS,MO,,9309,1/10/2010,Theft,Desktop Computer,1/23/2014,,1/10/2010,,Colorado government,105470,Healthcare,lost / stolen computer
4,2010,28,27,Ashley and Gray DDS,MO,,9309,1/10/2010,Theft,Desktop Computer,1/23/2014,,1/10/2010,,Educational Credit Management Corporation,3300000,Financial,lost / stolen media
5,2010,28,27,Ashley and Gray DDS,MO,,9309,1/10/2010,Theft,Desktop Computer,1/23/2014,,1/10/2010,,Embassy Cables,251000,Government,inside job
6,2010,28,27,Ashley and Gray DDS,MO,,9309,1/10/2010,Theft,Desktop Computer,1/23/2014,,1/10/2010,,"Emergency Healthcare Physicians, Ltd.",180111,Healthcare,lost / stolen media
7,2010,28,27,Ashley and Gray DDS,MO,,9309,1/10/2010,Theft,Desktop Computer,1/23/2014,,1/10/2010,,Federal Reserve Bank of Cleveland,400000,Financial,hacked
8,2010,28,27,Ashley and Gray DDS,MO,,9309,1/10/2010,Theft,Desktop Computer,1/23/2014,,1/10/2010,,Gawker,1500000,Web,hacked
9,2010,28,27,Ashley and Gray DDS,MO,,9309,1/10/2010,Theft,Desktop Computer,1/23/2014,,1/10/2010,,JP Morgan Chase,2600000,Financial,lost / stolen media


In [84]:
all_data.columns

Index(['Year', 'Unnamed: 0', 'Number', 'Name_of_Covered_Entity', 'State',
       'Business_Associate_Involved', 'Individuals_Affected', 'Date_of_Breach',
       'Type_of_Breach', 'Location_of_Breached_Information',
       'Date_Posted_or_Updated', 'Summary', 'breach_start', 'breach_end',
       'Entity ', 'Records ', 'Organization Type ', 'Method '],
      dtype='object')

In [85]:
security_data = all_data[['Year', 'Location_of_Breached_Information', 'Method ', 'Type_of_Breach', 'Entity ', 'Organization Type ', 'Summary', 'Individuals_Affected']].copy()
security_data.head(500)

Unnamed: 0,Year,Location_of_Breached_Information,Method,Type_of_Breach,Entity,Organization Type,Summary,Individuals_Affected
0,2010,Desktop Computer,hacked,Theft,"Ankle & Foot Center of Tampa Bay, Inc.",Healthcare,,9309
1,2010,Desktop Computer,hacked,Theft,AT&T,Telecoms,,9309
2,2010,Desktop Computer,hacked,Theft,Betfair,Web,,9309
3,2010,Desktop Computer,lost / stolen computer,Theft,Colorado government,Healthcare,,9309
4,2010,Desktop Computer,lost / stolen media,Theft,Educational Credit Management Corporation,Financial,,9309
...,...,...,...,...,...,...,...,...
495,2010,Laptop,lost / stolen media,Theft,"South Shore Hospital, Massachusetts",Healthcare,An unencrypted laptop was stolen from an emplo...,955
496,2010,Laptop,lost / stolen media,Theft,"Triple-S Salud, Inc.",Healthcare,An unencrypted laptop was stolen from an emplo...,955
497,2010,Laptop,inside job,Theft,U.S. Army,Government,An unencrypted laptop was stolen from an emplo...,955
498,2010,Laptop,inside job,Theft,U.S. government (United States diplomatic cabl...,Military,An unencrypted laptop was stolen from an emplo...,955


In [86]:
# Using .rename(columns={}) in order to rename columns
new_security_data = security_data.rename(columns={"Type_of_Breach": "Type of Breach",
                                                  "Entity ":"Name of Business",
                                                  "Method ":"Method of Breach",
                                                  "Location_of_Breached_Information":"Place of Info Breach",
                                                  "Individuals_Affected": "Individuals Affected",
                                                  "Summary":"Summary of Events"})
new_security_data.head(100)

Unnamed: 0,Year,Place of Info Breach,Method of Breach,Type of Breach,Name of Business,Organization Type,Summary of Events,Individuals Affected
0,2010,Desktop Computer,hacked,Theft,"Ankle & Foot Center of Tampa Bay, Inc.",Healthcare,,9309
1,2010,Desktop Computer,hacked,Theft,AT&T,Telecoms,,9309
2,2010,Desktop Computer,hacked,Theft,Betfair,Web,,9309
3,2010,Desktop Computer,lost / stolen computer,Theft,Colorado government,Healthcare,,9309
4,2010,Desktop Computer,lost / stolen media,Theft,Educational Credit Management Corporation,Financial,,9309
...,...,...,...,...,...,...,...,...
95,2010,Paper,lost / stolen media,Theft,"South Shore Hospital, Massachusetts",Healthcare,The covered entity's (CE) business associate (...,605
96,2010,Paper,lost / stolen media,Theft,"Triple-S Salud, Inc.",Healthcare,The covered entity's (CE) business associate (...,605
97,2010,Paper,inside job,Theft,U.S. Army,Government,The covered entity's (CE) business associate (...,605
98,2010,Paper,inside job,Theft,U.S. government (United States diplomatic cabl...,Military,The covered entity's (CE) business associate (...,605


In [None]:
# ######################################################################################

In [None]:
# Side by side bar chart --- GJM

labels = ['G1', 'G2', 'G3', 'G4', 'G5']
men_means = ['Method']
women_means = [25, 32, 34, 20, 25]

# the label locations
x = np.arange(len(labels)) 

# the width of the bars
width = 0.35 

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, men_means, width, label='Men')
rects2 = ax.bar(x + width/2, women_means, width, label='Women')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Scores')
ax.set_title('Scores by group and gender')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()


def autolabel(rects):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for rect in rects:
        height = rect.get_height()
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')


autolabel(rects1)
autolabel(rects2)

fig.tight_layout()

plt.show()

In [57]:
# Side by side bar chart --- GJM

labels = ['G1', 'G2', 'G3', 'G4', 'G5']
men_means = [20, 34, 30, 35, 27]
women_means = [25, 32, 34, 20, 25]

x = np.arange(len(labels))  # the label locations
width = 0.35  # the width of the bars

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, men_means, width, label='Men')
rects2 = ax.bar(x + width/2, women_means, width, label='Women')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Scores')
ax.set_title('Scores by group and gender')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()


def autolabel(rects):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for rect in rects:
        height = rect.get_height()
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')


autolabel(rects1)
autolabel(rects2)

fig.tight_layout()

plt.show()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [54]:
# Bar in Pie chart -- GJM

# make figure and assign axis objects
fig = plt.figure(figsize=(9, 5.0625))
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
fig.subplots_adjust(wspace=0)

# pie chart parameters
ratios = [.27, .56, .17]
labels = ['Approve', 'Disapprove', 'Undecided']
explode = [0.1, 0, 0]
# rotate so that first wedge is split by the x-axis
angle = -180 * ratios[0]
ax1.pie(ratios, autopct='%1.1f%%', startangle=angle,
        labels=labels, explode=explode)

# bar chart parameters

xpos = 0
bottom = 0
ratios = [.33, .54, .07, .06]
width = .2
colors = [[.1, .3, .5], [.1, .3, .3], [.1, .3, .7], [.1, .3, .9]]

for j in range(len(ratios)):
    height = ratios[j]
    ax2.bar(xpos, height, width, bottom=bottom, color=colors[j])
    ypos = bottom + ax2.patches[j].get_height() / 2
    bottom += height
    ax2.text(xpos, ypos, "%d%%" % (ax2.patches[j].get_height() * 100),
             ha='center')

ax2.set_title('Age of approvers')
ax2.legend(('50-65', 'Over 65', '35-49', 'Under 35'))
ax2.axis('off')
ax2.set_xlim(- 2.5 * width, 2.5 * width)

# use ConnectionPatch to draw lines between the two plots
# get the wedge data
theta1, theta2 = ax1.patches[0].theta1, ax1.patches[0].theta2
center, r = ax1.patches[0].center, ax1.patches[0].r
bar_height = sum([item.get_height() for item in ax2.patches])

# draw top connecting line
x = r * np.cos(np.pi / 180 * theta2) + center[0]
y = np.sin(np.pi / 180 * theta2) + center[1]
con = ConnectionPatch(xyA=(- width / 2, bar_height), xyB=(x, y),
                      coordsA="data", coordsB="data", axesA=ax2, axesB=ax1)
con.set_color([0, 0, 0])
con.set_linewidth(4)
ax2.add_artist(con)

# draw bottom connecting line
x = r * np.cos(np.pi / 180 * theta1) + center[0]
y = np.sin(np.pi / 180 * theta1) + center[1]
con = ConnectionPatch(xyA=(- width / 2, 0), xyB=(x, y), coordsA="data",
                      coordsB="data", axesA=ax2, axesB=ax1)
con.set_color([0, 0, 0])
ax2.add_artist(con)
con.set_linewidth(4)

plt.show()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [55]:
users = [13000, 26000, 52000, 30000, 9000]
x_axis = np.arange(len(users))

In [56]:
# Tell matplotlib that we will be making a bar chart
# Users is our y axis and x_axis is, of course, our x axis
# We apply align="edge" to ensure our bars line up with our tick marks
plt.bar(x_axis, users, color='r', alpha=0.75, align="center")

<IPython.core.display.Javascript object>

<BarContainer object of 5 artists>

<IPython.core.display.Javascript object>