# Companies Cyber Attack Analysis

In [None]:
# Cell 1: Distribution of attack types
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
data = pd.read_csv('../Data Warehousing ETL/Transformed_Data/companies_cyber_attacks_europe.csv')

In [None]:
data.columns


## General Analysis

In [None]:
# Cell 1: Attacks per year
attacks_per_year = data.groupby('Year').size()

print("Attacks Per Year:")
print(attacks_per_year)


In [None]:
# Cell 2: Attacks per country
attacks_per_country = data.groupby('Country').size()

print("Attacks Per Country:")
print(attacks_per_country)


In [None]:
# Cell 3: Attacks per industry
attacks_per_industry = data.groupby('Industry').size()

print("Attacks Per Industry:")
print(attacks_per_industry)


In [None]:
# Cell 4: Attacks per attack type
attacks_per_attack_type = data.groupby('Attack_Type').size()

print("Attacks Per Attack Type:")
print(attacks_per_attack_type)


In [None]:
# Cell 5: Financial loss per year
financial_loss_per_year = data.groupby('Year')['Financial_Loss'].sum()

print("Financial Loss Per Year:")
print(financial_loss_per_year)


In [None]:
# Cell 6: Average breach size per industry
avg_breach_size_per_industry = data.groupby('Industry')['Breach_Size'].mean()

print("Average Breach Size Per Industry:")
print(avg_breach_size_per_industry)


In [None]:
# Cell 7: Average mitigation time per attack type
avg_mitigation_time_per_attack = data.groupby('Attack_Type')['Mitigation_Time'].mean()

print("Average Mitigation Time Per Attack Type:")
print(avg_mitigation_time_per_attack)


In [None]:
# Cell 5: Descriptive statistics for numeric columns
data.describe()


## Distribution Plots

In [None]:
colors = ['#ADD8E6', '#87CEEB', '#4682B4', '#1E90FF', '#4169E1', '#000080']


# Cell: Distribution of attack types
plt.figure(figsize=(10, 6))
sns.countplot(y="Attack_Type", data=data, order=data["Attack_Type"].value_counts().index, palette=colors)
plt.title("Distribution of Cyber Attack Types")
plt.xlabel("Count of Attacks")
plt.ylabel("Attack Type")
plt.show()




In [None]:
# Cell 2: Distribution of incidents by country
plt.figure(figsize=(10, 6))
sns.countplot(y="Country", data=data, order=data["Country"].value_counts().index[:10], palette="inferno")
plt.title("Top 10 Countries by Number of Cyber Attacks")
plt.xlabel("Count of Attacks")
plt.ylabel("Country")
plt.show()


In [None]:
# Cell 3: Trend of cyber attacks over the years
plt.figure(figsize=(10, 6))
sns.countplot(x="Year", data=data, palette="coolwarm")
plt.title("Trend of Cyber Attacks Over the Years")
plt.xlabel("Year")
plt.ylabel("Count of Attacks")
plt.show()


In [None]:
# Cell 3: Number of attacks per industry
plt.figure(figsize=(10, 6))
attacks_per_industry.sort_values(ascending=False).plot(kind='bar', color='orange')
plt.title('Number of Cyber Attacks by Industry')
plt.xlabel('Industry')
plt.ylabel('Number of Attacks')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
# Cell 5: Average breach size per industry
plt.figure(figsize=(10, 6))
avg_breach_size_per_industry.sort_values(ascending=False).plot(kind='bar', color='purple')
plt.title('Average Breach Size by Industry')
plt.xlabel('Industry')
plt.ylabel('Average Breach Size (Number of Records)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
# Cell 6: Average mitigation time per attack type
plt.figure(figsize=(10, 6))
avg_mitigation_time_per_attack.sort_values(ascending=False).plot(kind='bar', color='blue')
plt.title('Average Mitigation Time by Attack Type')
plt.xlabel('Attack Type')
plt.ylabel('Mitigation Time (Days)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
# Cell 2: Severity distribution based on financial loss
def severity_level(row):
    if row['Financial_Loss'] < 500000:
        return 'Low'
    elif 500000 <= row['Financial_Loss'] < 2000000:
        return 'Medium'
    else:
        return 'High'

data['Severity'] = data.apply(severity_level, axis=1)

plt.figure(figsize=(8, 6))
sns.countplot(x='Severity', data=data, palette='muted')
plt.title("Distribution of Attack Severity Levels")
plt.xlabel("Severity Level")
plt.ylabel("Count of Attacks")
plt.show()


In [None]:
# Cell 6: Average Financial Loss by Attack Vector
attack_vector_financial_loss = data.groupby('Attack_Vector')['Financial_Loss'].mean()

plt.figure(figsize=(10, 6))
attack_vector_financial_loss.plot(kind='bar', color='purple')
plt.title('Average Financial Loss by Attack Vector')
plt.xlabel('Attack Vector')
plt.ylabel('Average Financial Loss (USD)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
# Cell 1: Top 10 Companies with the Most Cyber Attacks
top_companies = data['Company_Name'].value_counts().head(10)

plt.figure(figsize=(10, 6))
sns.barplot(y=top_companies.index, x=top_companies.values, palette='coolwarm')
plt.title('Top 10 Companies with the Most Cyber Attacks')
plt.xlabel('Number of Attacks')
plt.ylabel('Company Name')
plt.tight_layout()
plt.show()


In [None]:
# Cell 2: Financial Loss by Industry
plt.figure(figsize=(12, 6))
sns.boxplot(x='Industry', y='Financial_Loss', data=data, palette='muted')
plt.title("Distribution of Financial Loss by Industry")
plt.xlabel("Industry")
plt.ylabel("Financial Loss (USD)")
plt.xticks(rotation=45)
plt.show()


In [None]:
# Cell 3: Breach Size by Attack Vector
plt.figure(figsize=(12, 6))
sns.boxplot(x='Attack_Vector', y='Breach_Size', data=data, palette='Set3')
plt.title("Distribution of Breach Size by Attack Vector")
plt.xlabel("Attack Vector")
plt.ylabel("Breach Size (No. of Records)")
plt.xticks(rotation=45)
plt.show()


In [None]:
# Cell 4: Mitigation Time by Industry
plt.figure(figsize=(12, 6))
sns.boxplot(x='Industry', y='Mitigation_Time', data=data, palette='coolwarm')
plt.title("Mitigation Time by Industry")
plt.xlabel("Industry")
plt.ylabel("Mitigation Time (Days)")
plt.xticks(rotation=45)
plt.show()


In [None]:
# Cell 5: Mitigation Time by Attack Vector
plt.figure(figsize=(12, 6))
sns.boxplot(x='Attack_Vector', y='Mitigation_Time', data=data, palette='magma')
plt.title("Mitigation Time by Attack Vector")
plt.xlabel("Attack Vector")
plt.ylabel("Mitigation Time (Days)")
plt.xticks(rotation=45)
plt.show()


In [None]:
# Cell 8: Breach Size by Industry
plt.figure(figsize=(12, 6))
sns.boxplot(x='Industry', y='Breach_Size', data=data, palette='Purples')
plt.title("Breach Size Distribution by Industry")
plt.xlabel("Industry")
plt.ylabel("Breach Size (No. of Records)")
plt.xticks(rotation=45)
plt.show()


In [None]:
# Cell 4: Breach Size Distribution by Country
plt.figure(figsize=(12, 6))
sns.boxplot(x='Country', y='Breach_Size', data=data, palette='Blues')
plt.title("Breach Size Distribution by Country")
plt.xlabel("Country")
plt.ylabel("Breach Size (Number of Records)")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


## Stacked Plots

In [None]:

# Cell: Stacked bar plot for industries vs attack types
industry_vs_attack_type = data.groupby(['Industry', 'Attack_Type']).size().unstack().fillna(0)
plt.figure(figsize=(10, 6))
industry_vs_attack_type.plot(kind='bar', stacked=True, figsize=(10, 6), colormap='coolwarm')
plt.title('Industry vs Attack Types')
plt.xlabel('Industry')
plt.ylabel('Number of Attacks')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
colors = ['#ADD8E6', '#87CEEB', '#4682B4', '#1E90FF', '#4169E1', '#000080']
# Cell: Stacked bar plot for countries vs attack types
country_vs_attack_type = data.groupby(['Country', 'Attack_Type']).size().unstack().fillna(0)
plt.figure(figsize=(10, 6))
country_vs_attack_type.plot(kind='bar', stacked=True, figsize=(10, 6), colormap='viridis')
plt.title('Country vs Attack Types')
plt.xlabel('Country')
plt.ylabel('Number of Attacks')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
colors = ['#ADD8E6', '#87CEEB', '#4682B4', '#1E90FF', '#4169E1', '#000080', '#ADD8E6']
# Cell: Attack vectors by industry
industry_vs_attack_vector = data.groupby(['Industry', 'Attack_Vector']).size().unstack().fillna(0)
plt.figure(figsize=(12, 8))
industry_vs_attack_vector.plot(kind='bar', stacked=True, figsize=(10, 6), color=colors)
plt.title('Attack Vectors by Industry')
plt.xlabel('Industry')
plt.ylabel('Number of Attacks')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()




In [None]:
# Cell: Severity levels by country
severity_by_country = data.groupby(['Country', 'Severity']).size().unstack().fillna(0)
plt.figure(figsize=(12, 8))
severity_by_country.plot(kind='bar', stacked=True, figsize=(10, 6), colormap='coolwarm')
plt.title('Severity Levels by Country')
plt.xlabel('Country')
plt.ylabel('Number of Attacks')
plt.xticks(rotation=45)
plt.show()


In [None]:
# Cell 6: Yearly Shifts in Attack Severity
data['Severity'] = data.apply(severity_level, axis=1)
data['Year'] = pd.to_datetime(data['Date_of_Incident']).dt.year
severity_year = data.groupby(['Year', 'Severity']).size().unstack().fillna(0)

plt.figure(figsize=(12, 8))
severity_year.plot(kind='bar', stacked=True, figsize=(10, 6), colormap='viridis')
plt.title("Yearly Shifts in Attack Severity")
plt.xlabel("Year")
plt.ylabel("Number of Attacks")
plt.xticks(rotation=45)
plt.show()


In [None]:
# Cell 3: Industry Impact Over Time
industry_trends = data.groupby(['Year', 'Industry']).size().unstack(fill_value=0)

plt.figure(figsize=(10, 6))
industry_trends.plot(kind='bar', stacked=True, colormap='coolwarm')
plt.title('Industry Impact Over Time')
plt.xlabel('Year')
plt.ylabel('Number of Attacks')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
# Cell 2: Severity Levels by Attack Vector
data['Severity'] = data.apply(lambda row: 'Low' if row['Financial_Loss'] < 500000 
                              else 'Medium' if 500000 <= row['Financial_Loss'] < 2000000 
                              else 'High', axis=1)
severity_by_attack_vector = data.groupby(['Attack_Vector', 'Severity']).size().unstack().fillna(0)

plt.figure(figsize=(12, 8))
severity_by_attack_vector.plot(kind='bar', stacked=True, colormap='viridis')
plt.title('Severity Levels by Attack Vector')
plt.xlabel('Attack Vector')
plt.ylabel('Number of Attacks')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
# Cell 3: Average Breach Size by Industry and Attack Vector
industry_attack_breach = data.groupby(['Industry', 'Attack_Vector'])['Breach_Size'].mean().unstack().fillna(0)

plt.figure(figsize=(12, 8))
industry_attack_breach.plot(kind='bar', stacked=True, colormap='coolwarm')
plt.title('Average Breach Size by Industry and Attack Vector')
plt.xlabel('Industry')
plt.ylabel('Average Breach Size (No. of Records)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


## Time Series Analysis

In [None]:


# Convert 'Date_of_Incident' to datetime if it exists, else extract date-related features
if 'Date_of_Incident' in data.columns:
    data['Date_of_Incident'] = pd.to_datetime(data['Date_of_Incident'])
    data['Year'] = data['Date_of_Incident'].dt.year
    data['Month'] = data['Date_of_Incident'].dt.month
    data['DayOfWeek'] = data['Date_of_Incident'].dt.day_name()

In [None]:
# Cell 4: Financial losses per year
plt.figure(figsize=(10, 6))
financial_loss_per_year.plot(kind='line', marker='o', color='red')
plt.title('Total Financial Loss from Cyber Attacks per Year')
plt.xlabel('Year')
plt.ylabel('Total Financial Loss (USD)')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Cell 1: Average Mitigation Time per Year
mitigation_time_per_year = data.groupby('Year')['Mitigation_Time'].mean()

plt.figure(figsize=(10, 6))
mitigation_time_per_year.plot(kind='line', marker='o', color='red')
plt.title('Average Mitigation Time per Year')
plt.xlabel('Year')
plt.ylabel('Average Mitigation Time (Days)')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Cell 1: Time Series Analysis - Monthly trends of cyber attacks
attacks_per_month = data.groupby(['Year', 'Month']).size().unstack().fillna(0)
plt.figure(figsize=(12, 6))
sns.heatmap(attacks_per_month, cmap='Blues', annot=True, fmt="d")
plt.title("Monthly Trends of Cyber Attacks Over the Years")
plt.xlabel("Month")
plt.ylabel("Year")
plt.show()


In [None]:
# Cell 1: Monthly Trends of Cyber Attacks
monthly_trends = data.groupby('Month').size()

plt.figure(figsize=(10, 6))
monthly_trends.plot(kind='bar', color='skyblue')
plt.title('Cyber Attacks by Month')
plt.xlabel('Month')
plt.ylabel('Number of Attacks')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
# Cell 4: Cyber Attacks by Day of the Week
data['Date_of_Incident'] = pd.to_datetime(data['Date_of_Incident'])
data['DayOfWeek'] = data['Date_of_Incident'].dt.day_name()

day_of_week_trends = data.groupby('DayOfWeek').size().reindex(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])

plt.figure(figsize=(10, 6))
day_of_week_trends.plot(kind='bar', color='orange')
plt.title('Cyber Attacks by Day of the Week')
plt.xlabel('Day of the Week')
plt.ylabel('Number of Attacks')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
