# Analysis Notebook for A Series Of Unfortunate Directors Blog

In [None]:
import pandas as pd
import seaborn as sns
from scipy import stats
import numpy as np
import time
import matplotlib.pyplot as plt
plt.rcParams['font.size'] = '16'

# Ownership share of companies analysis

In [None]:
df1 = pd.read_csv("",sep=',',header=None,on_bad_lines='skip')
df1.columns=["timestamp","window","psc","company", "weight"]
df1

In [None]:
df2 = pd.read_csv("",sep=',',header=None,on_bad_lines='skip')
df2.columns=["timestamp","window","psc","company", "weight"]
df2

In [None]:
df3 = pd.read_csv("",sep=',',header=None,on_bad_lines='skip')
df3.columns=["timestamp","window","psc","company", "weight"]
df3

In [None]:
df4 = pd.read_csv("",sep=',',header=None,on_bad_lines='skip')
df4.columns=["timestamp","window","psc","company", "weight"]
df4

In [None]:
# Concatenate dataframes from partitions into one
ceasedCompaniesDf = pd.concat([df1, df2, df3, df4], axis=0)
ceasedCompaniesDf

In [None]:
# Convert from Epoch milliseconds to Date Time
ceasedCompaniesDf['timestamp'] = ceasedCompaniesDf['timestamp']/1000
import datetime
ceasedCompaniesDf['timestamp'] = pd.to_datetime(ceasedCompaniesDf['timestamp'], unit="s")

In [None]:
# Filter persons that have significant control of over 100 companies
ceasedCompaniesDf = ceasedCompaniesDf[ceasedCompaniesDf['psc'].map(ceasedCompaniesDf['psc'].value_counts()) > 100]

In [None]:
# Filter all people that had empty names
ceasedCompaniesDf = ceasedCompaniesDf.drop(ceasedCompaniesDf[ceasedCompaniesDf['psc']=="No-Name-00"].index)

In [None]:
# Create dataframe for companies at One George Yard
pscFilterList = ["Sdg-Secretaries-Limited-00", "Fd-Secretarial-Ltd-00", "Woodberry-Secretarial-Limited-00", "Sdg-Registrars-Limited-00"]
filteredSeries = ceasedCompaniesDf.psc.isin(pscFilterList)
filteredDf = ceasedCompaniesDf[filteredSeries]

In [None]:
bulkdf1 = pd.read_csv("",sep=',',header=None,on_bad_lines='skip')
bulkdf1.columns=["timestamp","window","psc","company", "weight"]
bulkdf1

In [None]:
bulkdf2 = pd.read_csv("",sep=',',header=None,on_bad_lines='skip')
bulkdf2.columns=["timestamp","window","psc","company", "weight"]
bulkdf2

In [None]:
bulkdf3 = pd.read_csv("",sep=',',header=None,on_bad_lines='skip')
bulkdf3.columns=["timestamp","window","psc","company", "weight"]
bulkdf3

In [None]:
bulkdf4 = pd.read_csv("",sep=',',header=None,on_bad_lines='skip')
bulkdf4.columns=["timestamp","window","psc","company", "weight"]
bulkdf4

In [None]:
# Concatenate dataframes from partitions into one dataframe
bulkDf = pd.concat([bulkdf1, bulkdf2, bulkdf3, bulkdf4], axis=0)
bulkDf

In [None]:
# Filter all people that had empty names
openedCompaniesDf = bulkDf.drop(bulkDf[bulkDf['psc']=="No-Name-00"].index)

In [None]:
# Convert from Epoch milliseconds to date time
openedCompaniesDf['timestamp'] = openedCompaniesDf['timestamp']/1000
import datetime
openedCompaniesDf['timestamp'] = pd.to_datetime(openedCompaniesDf['timestamp'], unit="s")

In [None]:
# Filter out psc's with over 100 companies
openedCompanies = openedCompaniesDf[openedCompaniesDf['psc'].map(openedCompaniesDf['psc'].value_counts()) > 100]
openedCompanies

In [None]:
# Filter related companies
openCompaniesFilter = ["Sdg-Secretaries-Limited-00", "Fd-Secretarial-Ltd-00", "Woodberry-Secretarial-Limited-00", "Sdg-Registrars-Limited-00"]
filteredList = openedCompaniesDf.psc.isin(openCompaniesFilter)
filteredCompaniesOpened = openedCompaniesDf[filteredList]

In [None]:
# Create heatmap of Person with Significant Control's (PSC) ownership of companies opening
plt.figure(figsize=(15,10))
plt.title('Shared Ownership (%) in companies registering on Companies House')
plt.xlabel('Time (Year-Month)')
plt.ylabel('Person With Significant Control')
g = sns.scatterplot(data=openedCompanies, x="timestamp", y="psc", hue="weight")

In [None]:
# Create heatmap of Person with Significant Control's (PSC) ownership of companies closing
plt.figure(figsize=(15,10))
plt.title('Shared Ownership (%) in companies closing down on Companies House')
plt.xlabel('Time (Year-Month)')
plt.ylabel('Person With Significant Control')
g = sns.scatterplot(data=ceasedCompaniesDf, x="timestamp", y="psc", hue="weight")

In [None]:
# Plot cool graphs of all the edges between people and companies
import graphistry
g=graphistry.bind(source='company', destination='psc').edges(df)
g.plot()
filteredGraph=graphistry.edges(filteredDf).bind(source='psc', destination='company', edge_weight='weight')
filteredGraph.plot()

# Company Formation Agents Analysis

In [None]:
cohodf = pd.read_csv("",sep=',',header=None)

In [None]:
cohodf.columns=["timestamp","window","psc","company"]

In [None]:
swiftFilter = "8d_bnTiwfxh8JIr3YfuwkmkWkCg"
swiftDf=cohodf[cohodf['officer']==swiftFilter]

instantFilter = "SBjtBss_I4XEupbfAUXoeAkMcIk"
instantDf=cohodf[cohodf['officer']==instantFilter]

companyDirectorsFilter = "C7trUnW0xAvzpaSmVXVviwNi2BY"
companyDirectorsDf = cohodf[cohodf['officer']==companyDirectorsFilter]

templeSecretariesFilter="xLPL0PBzn14BtfuhzOZQswj4AoM"
templeSecretariesDf = cohodf[cohodf['officer']==templeSecretariesFilter]

formTenDirectorsFilter = "aDjhOpnMaB_uAHDxRnMLWpa9C-I"
formTenDirectorsDf = cohodf[cohodf['officer']==formTenDirectorsFilter]

formTenSecretariesFilter = "Yg4rTn5QucYg_hJOxGTnx3B51WY"
formTenSecretariesDf = cohodf[cohodf['officer']==formTenSecretariesFilter]

harleyStreetFilter = "NjXloTUDlStWw9T7mfDI5dkFLsg"
harleyStreetDf=cohodf[cohodf['officer']==harleyStreetFilter]

In [None]:
harleyStreetDf1=harleyStreetDf[harleyStreetDf["Company Duration"]<4000]
harleyStreetDf2=harleyStreetDf[harleyStreetDf["Company Duration"]<1000]
harleyStreetDf3=harleyStreetDf[harleyStreetDf["Company Duration"]<365]

sns.histplot(data=harleyStreetDf3, x="Company Duration", color="skyblue")
plt.xlabel("Days", fontsize=15)
plt.ylabel("Frequency",fontsize=15)
plt.title("Company Duration")

In [None]:
templeSecretariesDf=templeSecretariesDf[templeSecretariesDf["Company Duration"]>=0]
templeSecretariesDf1=templeSecretariesDf[templeSecretariesDf["Company Duration"]<4000]
templeSecretariesDf2=templeSecretariesDf[templeSecretariesDf["Company Duration"]<1000]
templeSecretariesDf3=templeSecretariesDf[templeSecretariesDf["Company Duration"]<365]

g= sns.histplot(data=templeSecretariesDf1, x="Company Duration", color="salmon")
g.set_yscale("log")
plt.xlabel("Days", fontsize=15)
plt.ylabel("Frequency",fontsize=15)
plt.title("Company Duration")

In [None]:
companyDirectorsDf=companyDirectorsDf[companyDirectorsDf["Company Duration"]>=0]
companyDirectorsDf1=companyDirectorsDf[companyDirectorsDf["Company Duration"]<4000]
companyDirectorsDf2=companyDirectorsDf[companyDirectorsDf["Company Duration"]<1000]
companyDirectorsDf3=companyDirectorsDf[companyDirectorsDf["Company Duration"]<365]


g= sns.histplot(data=companyDirectorsDf3, x="Company Duration", color= "burlywood")
#g.set_yscale("log")
plt.xlabel("Days", fontsize=15)
plt.ylabel("Frequency",fontsize=15)
plt.title("Company Duration")