### Lets start the cleaning process by importing the necessary libraries and setting some default visual parameters for better viewing

In [9]:
import pandas as pd
import numpy as np
from urllib.parse import urlparse
import os
from pathlib import Path
import re
import ast
import json

pd.set_option("display.max_rows", 20)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 140)

In [10]:
# path to the raw failory CSV
file_path = "/workspaces/dsi-ws2025-project-grpab-weigl-mds1ab-awp-proj2/Data_Acquisition/failory_eu_companies_raw.csv"  

df = pd.read_csv(file_path, dtype=str).fillna("")
print("Loaded shape:", df.shape)
df.head(5)


Loaded shape: (5103, 15)


Unnamed: 0,rank,country,name,description,state,city,started_in,founders,industries,employees,funding_raw,funding_rounds,investors_count,investors_list,source_url
0,1,Austria,Bitpanda,Bitpanda is a digital asset exchange that make...,Wien,Vienna,2014,"Christian Trummer, Eric Demuth, Paul Klanschek","Bitcoin, Blockchain, Cryptocurrency, FinTech",251-500,"$283,013,472",5,6,"['DST Global', 'Wintermute Trading', 'Speedinv...",https://www.failory.com/startups/austria
1,2,Austria,Tractive,Tractive is a company that creates GPS trackin...,Oberosterreich,Pasching,2012,"Michael Hurnaus, Michael Lettner, Michael Tsch...","GPS, Pet, Software, Wearables",51-100,"$37,687,997",5,5,"['Guidepost Growth Equity', 'Monkfish Equity',...",https://www.failory.com/startups/austria
2,3,Austria,GoStudent,"In a virtual learning environment, GoStudent i...",Wien,Vienna,2016,"Felix Ohswald, Gregor MÃ¼ller, Moritz Ohswald","Apps, Continuing Education, EdTech, Education",251-500,"$100,662,844",6,7,"['DN Capital', 'Coatue', 'Speedinvest', 'Left ...",https://www.failory.com/startups/austria
3,4,Austria,Allcyte,Allcyte is a biotech start-up focused on patie...,Wien,Vienna,2017,"Berend Snijder, Giulio Superti-Furga, Gregory ...","Biotechnology, Health Diagnostics, Machine Lea...",11-50,"$6,000,000",2,5,"['42CAP', 'Air Street Capital', 'PUSH Ventures...",https://www.failory.com/startups/austria
4,5,Austria,Adverity,Adverity is a data-driven marketing analytics ...,Wien,Vienna,2015,"Alexander IgelsbÃ¶ck, Andreas GlÃ¤nzer, Martin...","Advertising, Analytics, Artificial Intelligenc...",101-250,"$46,298,685",4,7,"['Sapphire Ventures', 'Speedinvest', 'Felix Ca...",https://www.failory.com/startups/austria


In [11]:
print("Columns:", df.columns.tolist())
print("\nDtypes:")
print(df.dtypes)

print("\nNull counts:")
print(df.isna().sum().sort_values(ascending=False))


Columns: ['rank', 'country', 'name', 'description', 'state', 'city', 'started_in', 'founders', 'industries', 'employees', 'funding_raw', 'funding_rounds', 'investors_count', 'investors_list', 'source_url']

Dtypes:
rank               object
country            object
name               object
description        object
state              object
city               object
started_in         object
founders           object
industries         object
employees          object
funding_raw        object
funding_rounds     object
investors_count    object
investors_list     object
source_url         object
dtype: object

Null counts:
rank               0
country            0
name               0
description        0
state              0
city               0
started_in         0
founders           0
industries         0
employees          0
funding_raw        0
funding_rounds     0
investors_count    0
investors_list     0
source_url         0
dtype: int64


Unlike our first source where the data was mostly missing for multiple features, we can see here that we have no nulls in this dataset and the quality of data is really good due to the source we picked (Focused on companies instead of a broad sourc elike wikidata)

Though we only have 5k records, most of these records are rich in information and we can hopefully clean this up to make the data more standardized without losing any more information

In the next following steps, let us try to clean the most important columns one by one and try to standardize them into a unified format

In [12]:
df_fail = df.copy()   # Working on a copy of data before commiting to the main dataframe

# Converting the feature started_in to int
df_fail["started_in"] = pd.to_numeric(df_fail["started_in"], errors="coerce")

# Converting employees "251-500" → min/max numeric scale for consistency. We will drop the original column in the end
def parse_employees(x):
    if "-" in x:
        low, high = x.split("-")
        return int(low), int(high)
    # single number (rare)
    if x.isdigit():
        return int(x), int(x)
    return np.nan, np.nan

df_fail[["employees_min", "employees_max"]] = df_fail["employees"].apply(
    lambda x: pd.Series(parse_employees(x))
)

df_fail.head(5)

Unnamed: 0,rank,country,name,description,state,city,started_in,founders,industries,employees,funding_raw,funding_rounds,investors_count,investors_list,source_url,employees_min,employees_max
0,1,Austria,Bitpanda,Bitpanda is a digital asset exchange that make...,Wien,Vienna,2014.0,"Christian Trummer, Eric Demuth, Paul Klanschek","Bitcoin, Blockchain, Cryptocurrency, FinTech",251-500,"$283,013,472",5,6,"['DST Global', 'Wintermute Trading', 'Speedinv...",https://www.failory.com/startups/austria,251.0,500.0
1,2,Austria,Tractive,Tractive is a company that creates GPS trackin...,Oberosterreich,Pasching,2012.0,"Michael Hurnaus, Michael Lettner, Michael Tsch...","GPS, Pet, Software, Wearables",51-100,"$37,687,997",5,5,"['Guidepost Growth Equity', 'Monkfish Equity',...",https://www.failory.com/startups/austria,51.0,100.0
2,3,Austria,GoStudent,"In a virtual learning environment, GoStudent i...",Wien,Vienna,2016.0,"Felix Ohswald, Gregor MÃ¼ller, Moritz Ohswald","Apps, Continuing Education, EdTech, Education",251-500,"$100,662,844",6,7,"['DN Capital', 'Coatue', 'Speedinvest', 'Left ...",https://www.failory.com/startups/austria,251.0,500.0
3,4,Austria,Allcyte,Allcyte is a biotech start-up focused on patie...,Wien,Vienna,2017.0,"Berend Snijder, Giulio Superti-Furga, Gregory ...","Biotechnology, Health Diagnostics, Machine Lea...",11-50,"$6,000,000",2,5,"['42CAP', 'Air Street Capital', 'PUSH Ventures...",https://www.failory.com/startups/austria,11.0,50.0
4,5,Austria,Adverity,Adverity is a data-driven marketing analytics ...,Wien,Vienna,2015.0,"Alexander IgelsbÃ¶ck, Andreas GlÃ¤nzer, Martin...","Advertising, Analytics, Artificial Intelligenc...",101-250,"$46,298,685",4,7,"['Sapphire Ventures', 'Speedinvest', 'Felix Ca...",https://www.failory.com/startups/austria,101.0,250.0


In [13]:
# Converting funding_raw values like "$283,013,472" → numeric
def parse_funding(amount):
    if amount in [None, '']:
        return np.nan
    cleaned = amount.replace("$", "").replace(",", "")
    if cleaned.lower().endswith("k"):
        return float(cleaned[:-1]) * 1_000
    if cleaned.lower().endswith("m"):
        return float(cleaned[:-1]) * 1_000_000
    if cleaned.lower().endswith("b"):
        return float(cleaned[:-1]) * 1_000_000_000
    return float(cleaned)

df_fail["funding_usd"] = df_fail["funding_raw"].apply(parse_funding)
df_fail.head()

Unnamed: 0,rank,country,name,description,state,city,started_in,founders,industries,employees,funding_raw,funding_rounds,investors_count,investors_list,source_url,employees_min,employees_max,funding_usd
0,1,Austria,Bitpanda,Bitpanda is a digital asset exchange that make...,Wien,Vienna,2014.0,"Christian Trummer, Eric Demuth, Paul Klanschek","Bitcoin, Blockchain, Cryptocurrency, FinTech",251-500,"$283,013,472",5,6,"['DST Global', 'Wintermute Trading', 'Speedinv...",https://www.failory.com/startups/austria,251.0,500.0,283013472.0
1,2,Austria,Tractive,Tractive is a company that creates GPS trackin...,Oberosterreich,Pasching,2012.0,"Michael Hurnaus, Michael Lettner, Michael Tsch...","GPS, Pet, Software, Wearables",51-100,"$37,687,997",5,5,"['Guidepost Growth Equity', 'Monkfish Equity',...",https://www.failory.com/startups/austria,51.0,100.0,37687997.0
2,3,Austria,GoStudent,"In a virtual learning environment, GoStudent i...",Wien,Vienna,2016.0,"Felix Ohswald, Gregor MÃ¼ller, Moritz Ohswald","Apps, Continuing Education, EdTech, Education",251-500,"$100,662,844",6,7,"['DN Capital', 'Coatue', 'Speedinvest', 'Left ...",https://www.failory.com/startups/austria,251.0,500.0,100662844.0
3,4,Austria,Allcyte,Allcyte is a biotech start-up focused on patie...,Wien,Vienna,2017.0,"Berend Snijder, Giulio Superti-Furga, Gregory ...","Biotechnology, Health Diagnostics, Machine Lea...",11-50,"$6,000,000",2,5,"['42CAP', 'Air Street Capital', 'PUSH Ventures...",https://www.failory.com/startups/austria,11.0,50.0,6000000.0
4,5,Austria,Adverity,Adverity is a data-driven marketing analytics ...,Wien,Vienna,2015.0,"Alexander IgelsbÃ¶ck, Andreas GlÃ¤nzer, Martin...","Advertising, Analytics, Artificial Intelligenc...",101-250,"$46,298,685",4,7,"['Sapphire Ventures', 'Speedinvest', 'Felix Ca...",https://www.failory.com/startups/austria,101.0,250.0,46298685.0


In [14]:
#Converting investors_count & funding_rounds to int

df_fail["funding_rounds"] = pd.to_numeric(df_fail["funding_rounds"], errors="coerce", downcast="integer")
df_fail["investors_count"] = pd.to_numeric(df_fail["investors_count"], errors="coerce", downcast="integer")

In [15]:
#Converting investors_list string to a list
def parse_investor_list(x):
    if not isinstance(x, str) or x.strip() == "":
        return []
    s = x.strip()
    s = s.replace("'", '"')
    s = re.sub(r',$', '', s)  
    if not s.startswith("["):
        s = "[" + s
    if not s.endswith("]"):
        s = s + "]"
    try:
        return json.loads(s)
    except:
        return [item.strip() for item in re.split(r",\s*", x) if item.strip()]

df_fail["investors_list"] = df_fail["investors_list"].apply(parse_investor_list)


#Standardizing industries and founders into list
df_fail["industries_list"] = df_fail["industries"].apply(lambda x: [i.strip() for i in x.split(",")])
df_fail["founders_list"] = df_fail["founders"].apply(lambda x: [i.strip() for i in x.split(",")])

df_fail.head()

Unnamed: 0,rank,country,name,description,state,city,started_in,founders,industries,employees,funding_raw,funding_rounds,investors_count,investors_list,source_url,employees_min,employees_max,funding_usd,industries_list,founders_list
0,1,Austria,Bitpanda,Bitpanda is a digital asset exchange that make...,Wien,Vienna,2014.0,"Christian Trummer, Eric Demuth, Paul Klanschek","Bitcoin, Blockchain, Cryptocurrency, FinTech",251-500,"$283,013,472",5.0,6.0,"[DST Global, Wintermute Trading, Speedinvest, ...",https://www.failory.com/startups/austria,251.0,500.0,283013472.0,"[Bitcoin, Blockchain, Cryptocurrency, FinTech]","[Christian Trummer, Eric Demuth, Paul Klanschek]"
1,2,Austria,Tractive,Tractive is a company that creates GPS trackin...,Oberosterreich,Pasching,2012.0,"Michael Hurnaus, Michael Lettner, Michael Tsch...","GPS, Pet, Software, Wearables",51-100,"$37,687,997",5.0,5.0,"[['Guidepost Growth Equity', 'Monkfish Equity'...",https://www.failory.com/startups/austria,51.0,100.0,37687997.0,"[GPS, Pet, Software, Wearables]","[Michael Hurnaus, Michael Lettner, Michael Tsc..."
2,3,Austria,GoStudent,"In a virtual learning environment, GoStudent i...",Wien,Vienna,2016.0,"Felix Ohswald, Gregor MÃ¼ller, Moritz Ohswald","Apps, Continuing Education, EdTech, Education",251-500,"$100,662,844",6.0,7.0,"[DN Capital, Coatue, Speedinvest, Left Lane Ca...",https://www.failory.com/startups/austria,251.0,500.0,100662844.0,"[Apps, Continuing Education, EdTech, Education]","[Felix Ohswald, Gregor MÃ¼ller, Moritz Ohswald]"
3,4,Austria,Allcyte,Allcyte is a biotech start-up focused on patie...,Wien,Vienna,2017.0,"Berend Snijder, Giulio Superti-Furga, Gregory ...","Biotechnology, Health Diagnostics, Machine Lea...",11-50,"$6,000,000",2.0,5.0,"[42CAP, Air Street Capital, PUSH Ventures, Ami...",https://www.failory.com/startups/austria,11.0,50.0,6000000.0,"[Biotechnology, Health Diagnostics, Machine Le...","[Berend Snijder, Giulio Superti-Furga, Gregory..."
4,5,Austria,Adverity,Adverity is a data-driven marketing analytics ...,Wien,Vienna,2015.0,"Alexander IgelsbÃ¶ck, Andreas GlÃ¤nzer, Martin...","Advertising, Analytics, Artificial Intelligenc...",101-250,"$46,298,685",4.0,7.0,"[Sapphire Ventures, Speedinvest, Felix Capital...",https://www.failory.com/startups/austria,101.0,250.0,46298685.0,"[Advertising, Analytics, Artificial Intelligen...","[Alexander IgelsbÃ¶ck, Andreas GlÃ¤nzer, Marti..."


With this step, now we already have parsed employee ranges, funding converted to numeric which is a valuable feature, cleaned industry and founders list. So, we can now move on to the final cleaning steps

In [16]:
# Trimming whitespaces & unifying the casing of letters
text_cols = [
    "country", "name", "description", "state", "city",
    "founders", "industries"
]

for c in text_cols:
    df_fail[c] = df_fail[c].astype(str).str.strip()

# Standardizing country casing for consistency
df_fail["country"] = df_fail["country"].str.title()

# Normalizing city/state to title-case
df_fail["city"] = df_fail["city"].str.title()
df_fail["state"] = df_fail["state"].str.title()

df_fail.head(5)

Unnamed: 0,rank,country,name,description,state,city,started_in,founders,industries,employees,funding_raw,funding_rounds,investors_count,investors_list,source_url,employees_min,employees_max,funding_usd,industries_list,founders_list
0,1,Austria,Bitpanda,Bitpanda is a digital asset exchange that make...,Wien,Vienna,2014.0,"Christian Trummer, Eric Demuth, Paul Klanschek","Bitcoin, Blockchain, Cryptocurrency, FinTech",251-500,"$283,013,472",5.0,6.0,"[DST Global, Wintermute Trading, Speedinvest, ...",https://www.failory.com/startups/austria,251.0,500.0,283013472.0,"[Bitcoin, Blockchain, Cryptocurrency, FinTech]","[Christian Trummer, Eric Demuth, Paul Klanschek]"
1,2,Austria,Tractive,Tractive is a company that creates GPS trackin...,Oberosterreich,Pasching,2012.0,"Michael Hurnaus, Michael Lettner, Michael Tsch...","GPS, Pet, Software, Wearables",51-100,"$37,687,997",5.0,5.0,"[['Guidepost Growth Equity', 'Monkfish Equity'...",https://www.failory.com/startups/austria,51.0,100.0,37687997.0,"[GPS, Pet, Software, Wearables]","[Michael Hurnaus, Michael Lettner, Michael Tsc..."
2,3,Austria,GoStudent,"In a virtual learning environment, GoStudent i...",Wien,Vienna,2016.0,"Felix Ohswald, Gregor MÃ¼ller, Moritz Ohswald","Apps, Continuing Education, EdTech, Education",251-500,"$100,662,844",6.0,7.0,"[DN Capital, Coatue, Speedinvest, Left Lane Ca...",https://www.failory.com/startups/austria,251.0,500.0,100662844.0,"[Apps, Continuing Education, EdTech, Education]","[Felix Ohswald, Gregor MÃ¼ller, Moritz Ohswald]"
3,4,Austria,Allcyte,Allcyte is a biotech start-up focused on patie...,Wien,Vienna,2017.0,"Berend Snijder, Giulio Superti-Furga, Gregory ...","Biotechnology, Health Diagnostics, Machine Lea...",11-50,"$6,000,000",2.0,5.0,"[42CAP, Air Street Capital, PUSH Ventures, Ami...",https://www.failory.com/startups/austria,11.0,50.0,6000000.0,"[Biotechnology, Health Diagnostics, Machine Le...","[Berend Snijder, Giulio Superti-Furga, Gregory..."
4,5,Austria,Adverity,Adverity is a data-driven marketing analytics ...,Wien,Vienna,2015.0,"Alexander IgelsbÃ¶ck, Andreas GlÃ¤nzer, Martin...","Advertising, Analytics, Artificial Intelligenc...",101-250,"$46,298,685",4.0,7.0,"[Sapphire Ventures, Speedinvest, Felix Capital...",https://www.failory.com/startups/austria,101.0,250.0,46298685.0,"[Advertising, Analytics, Artificial Intelligen...","[Alexander IgelsbÃ¶ck, Andreas GlÃ¤nzer, Marti..."


We wrap up the basic standardization of textual columns with the code above as we can now move on to the numeric columns next

In [17]:
num_cols = ["started_in", "funding_rounds", "investors_count",
            "employees_min", "employees_max", "funding_usd"]

df_fail[num_cols] = df_fail[num_cols].apply(pd.to_numeric, errors="coerce")
df_fail[num_cols].describe()

Unnamed: 0,started_in,funding_rounds,investors_count,employees_min,employees_max,funding_usd
count,1408.0,1406.0,1360.0,4673.0,4673.0,3295.0
mean,2012.03196,3.886913,7.086765,195.304943,372.558956,47592090.0
std,12.951211,2.324405,5.66152,2071.937509,2291.545119,234609500.0
min,1836.0,1.0,1.0,1.0,8.0,1196.0
25%,2012.0,2.0,3.0,1.0,10.0,662320.5
50%,2014.0,3.0,6.0,11.0,50.0,3191836.0
75%,2017.0,5.0,9.0,51.0,100.0,19709430.0
max,2022.0,27.0,56.0,43840.0,43840.0,6040093000.0


In [18]:
df_fail.isna().sum().sort_values(ascending=False)

investors_count    3743
funding_rounds     3697
started_in         3695
funding_usd        1808
employees_min       430
employees_max       430
country               0
rank                  0
description           0
name                  0
city                  0
state                 0
funding_raw           0
employees             0
industries            0
founders              0
source_url            0
investors_list        0
industries_list       0
founders_list         0
dtype: int64

As a final step, let us look at the nulls for each column and decide what to do with them

In [19]:
df_fail[df_fail['investors_count'].isnull()]

Unnamed: 0,rank,country,name,description,state,city,started_in,founders,industries,employees,funding_raw,funding_rounds,investors_count,investors_list,source_url,employees_min,employees_max,funding_usd,industries_list,founders_list
22,23,Austria,Ares-Genetics,Ares Genetics creates DNA-Sequencing Tests bas...,,Vienna,,,,1-10,"$1,489,467",,,[],https://www.failory.com/startups/austria,1.0,10.0,1489467.0,[],[]
23,24,Austria,journiApp,"Using AI, you can make print items like photo ...",,Vienna,,,,1-10,"$452,207",,,[],https://www.failory.com/startups/austria,1.0,10.0,452207.0,[],[]
24,25,Austria,DerBrutkasten,Der Brutkasten is a multimedia portal for star...,,Vienna,,,,1-10,"$2,804,100",,,[],https://www.failory.com/startups/austria,1.0,10.0,2804100.0,[],[]
25,26,Austria,Kokoro,Kokoro captures the critical variables that le...,,Vienna,,,,1-10,"$315,357",,,[],https://www.failory.com/startups/austria,1.0,10.0,315357.0,[],[]
26,27,Austria,seasonax,"Financial professionals can use Seasonax, an a...",,Vienna,,,,1-10,"$1,301,989",,,[],https://www.failory.com/startups/austria,1.0,10.0,1301989.0,[],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5098,296,United Kingdom,Abbey Logistics Group,Abbey Logistics Group is a more environmentall...,,Liverpool,,,,501-1000,,,,[],https://www.failory.com/startups/united-kingdom,501.0,1000.0,,[],[]
5099,297,United Kingdom,Mando,"Through the concept, Mando assists large organ...",,Liverpool,,,,10001+,,,,[],https://www.failory.com/startups/united-kingdom,,,,[],[]
5100,298,United Kingdom,Littlewoods,Littlewoods was a Liverpool-based retail and f...,,Liverpool,,,,,,,,[],https://www.failory.com/startups/united-kingdom,,,,[],[]
5101,299,United Kingdom,Homelease,Homelease develops creative solutions to diffi...,,Liverpool,,,,1-10,,,,[],https://www.failory.com/startups/united-kingdom,1.0,10.0,,[],[]


Although some of these columns have nulls for these fields, we still retai important information like the name, country, funding amount etc for these so it only makes sense to retain everything in the final dataset despite the nulls

In [20]:
# Fixing encoding issues
df_final = df_fail.copy()
df_final["description"] = df_final["description"].str.encode("latin1", errors="ignore").str.decode("utf-8", errors="ignore")
df_final["founders"] = df_final["founders"].str.encode("latin1", errors="ignore").str.decode("utf-8", errors="ignore")
df_final["name"] = df_final["name"].str.encode("latin1", errors="ignore").str.decode("utf-8", errors="ignore")

# Converting list fields to JSON strings for safe CSV export
list_cols = ["industries_list", "founders_list", "investors_list"]
for col in list_cols:
    df_final[col] = df_final[col].apply(json.dumps)

#Previewing the head of the final dataset
df_final.head(10)

Unnamed: 0,rank,country,name,description,state,city,started_in,founders,industries,employees,funding_raw,funding_rounds,investors_count,investors_list,source_url,employees_min,employees_max,funding_usd,industries_list,founders_list
0,1,Austria,Bitpanda,Bitpanda is a digital asset exchange that make...,Wien,Vienna,2014.0,"Christian Trummer, Eric Demuth, Paul Klanschek","Bitcoin, Blockchain, Cryptocurrency, FinTech",251-500,"$283,013,472",5.0,6.0,"[""DST Global"", ""Wintermute Trading"", ""Speedinv...",https://www.failory.com/startups/austria,251.0,500.0,283013472.0,"[""Bitcoin"", ""Blockchain"", ""Cryptocurrency"", ""F...","[""Christian Trummer"", ""Eric Demuth"", ""Paul Kla..."
1,2,Austria,Tractive,Tractive is a company that creates GPS trackin...,Oberosterreich,Pasching,2012.0,"Michael Hurnaus, Michael Lettner, Michael Tsch...","GPS, Pet, Software, Wearables",51-100,"$37,687,997",5.0,5.0,"[""['Guidepost Growth Equity'"", ""'Monkfish Equi...",https://www.failory.com/startups/austria,51.0,100.0,37687997.0,"[""GPS"", ""Pet"", ""Software"", ""Wearables""]","[""Michael Hurnaus"", ""Michael Lettner"", ""Michae..."
2,3,Austria,GoStudent,"In a virtual learning environment, GoStudent i...",Wien,Vienna,2016.0,"Felix Ohswald, Gregor Müller, Moritz Ohswald","Apps, Continuing Education, EdTech, Education",251-500,"$100,662,844",6.0,7.0,"[""DN Capital"", ""Coatue"", ""Speedinvest"", ""Left ...",https://www.failory.com/startups/austria,251.0,500.0,100662844.0,"[""Apps"", ""Continuing Education"", ""EdTech"", ""Ed...","[""Felix Ohswald"", ""Gregor M\u00c3\u00bcller"", ..."
3,4,Austria,Allcyte,Allcyte is a biotech start-up focused on patie...,Wien,Vienna,2017.0,"Berend Snijder, Giulio Superti-Furga, Gregory ...","Biotechnology, Health Diagnostics, Machine Lea...",11-50,"$6,000,000",2.0,5.0,"[""42CAP"", ""Air Street Capital"", ""PUSH Ventures...",https://www.failory.com/startups/austria,11.0,50.0,6000000.0,"[""Biotechnology"", ""Health Diagnostics"", ""Machi...","[""Berend Snijder"", ""Giulio Superti-Furga"", ""Gr..."
4,5,Austria,Adverity,Adverity is a data-driven marketing analytics ...,Wien,Vienna,2015.0,"Alexander Igelsböck, Andreas Glänzer, Martin B...","Advertising, Analytics, Artificial Intelligenc...",101-250,"$46,298,685",4.0,7.0,"[""Sapphire Ventures"", ""Speedinvest"", ""Felix Ca...",https://www.failory.com/startups/austria,101.0,250.0,46298685.0,"[""Advertising"", ""Analytics"", ""Artificial Intel...","[""Alexander Igelsb\u00c3\u00b6ck"", ""Andreas Gl..."
5,6,Austria,PlanRadar,PlanRadar is a SaaS service for construction a...,Wien,Vienna,2013.0,"Clemens Hammerl, Constantin Köck, Domagoj Doli...","Android, Architecture, Building Maintenance, C...",101-250,,4.0,6.0,"[""Insight Partners"", ""Headline"", ""Cavalry Vent...",https://www.failory.com/startups/austria,101.0,250.0,,"[""Android"", ""Architecture"", ""Building Maintena...","[""Clemens Hammerl"", ""Constantin K\u00c3\u00b6c..."
6,7,Austria,refurbed,Refurbed is an online marketplace for refurbis...,Wien,Vienna,2017.0,"Jürgen Riedl, Kilian Kaminski, Peter Windischh...","Apps, E-Commerce, Electronics, Marketplace, So...",51-100,"$19,239,651",3.0,10.0,"[""FJ Labs"", ""Speedinvest"", ""All Iron Ventures""...",https://www.failory.com/startups/austria,51.0,100.0,19239651.0,"[""Apps"", ""E-Commerce"", ""Electronics"", ""Marketp...","[""J\u00c3\u00bcrgen Riedl"", ""Kilian Kaminski"",..."
7,8,Austria,TourRadar,TourRadar is a website that allows you to comp...,Wien,Vienna,2010.0,"Shawn Pittman, Travis Pittman","Marketplace, Search Engine, Tourism, Travel",251-500,"$66,500,000",5.0,14.0,"[""TCV"", ""Speedinvest"", ""Cherry Ventures"", ""End...",https://www.failory.com/startups/austria,251.0,500.0,66500000.0,"[""Marketplace"", ""Search Engine"", ""Tourism"", ""T...","[""Shawn Pittman"", ""Travis Pittman""]"
8,9,Austria,byrd,byrd is a member of an international e-commerc...,Wien,Vienna,2016.0,"Alexander Leichter, Christoph Krofitsch, Petra...","E-Commerce, Logistics, Supply Chain Management",51-100,,3.0,8.0,"[""FJ Labs"", ""Speedinvest"", ""VentureFriends"", ""...",https://www.failory.com/startups/austria,51.0,100.0,,"[""E-Commerce"", ""Logistics"", ""Supply Chain Mana...","[""Alexander Leichter"", ""Christoph Krofitsch"", ..."
9,10,Austria,Mimo,Mimo is a program that teaches computer scienc...,Wien,Vienna,2016.0,"Dennis Daume, Henry Ameseder, Johannes Berger,...","E-Learning, Information Technology, Mobile Apps",11-50,"$650,000",2.0,8.0,"[""['Techstars'"", ""'Techstars Berlin Accelerato...",https://www.failory.com/startups/austria,11.0,50.0,650000.0,"[""E-Learning"", ""Information Technology"", ""Mobi...","[""Dennis Daume"", ""Henry Ameseder"", ""Johannes B..."


In [21]:
# Saving the cleaned file as a csv for further use and ending the notebook here
output_path = "failory_clean_eu_companies.csv"

df_final.to_csv(output_path, index=False, encoding="utf-8")

print(f"Exported cleaned dataset to: {output_path}")
print(df_final.shape)
df_final.head()


Exported cleaned dataset to: failory_clean_eu_companies.csv
(5103, 20)


Unnamed: 0,rank,country,name,description,state,city,started_in,founders,industries,employees,funding_raw,funding_rounds,investors_count,investors_list,source_url,employees_min,employees_max,funding_usd,industries_list,founders_list
0,1,Austria,Bitpanda,Bitpanda is a digital asset exchange that make...,Wien,Vienna,2014.0,"Christian Trummer, Eric Demuth, Paul Klanschek","Bitcoin, Blockchain, Cryptocurrency, FinTech",251-500,"$283,013,472",5.0,6.0,"[""DST Global"", ""Wintermute Trading"", ""Speedinv...",https://www.failory.com/startups/austria,251.0,500.0,283013472.0,"[""Bitcoin"", ""Blockchain"", ""Cryptocurrency"", ""F...","[""Christian Trummer"", ""Eric Demuth"", ""Paul Kla..."
1,2,Austria,Tractive,Tractive is a company that creates GPS trackin...,Oberosterreich,Pasching,2012.0,"Michael Hurnaus, Michael Lettner, Michael Tsch...","GPS, Pet, Software, Wearables",51-100,"$37,687,997",5.0,5.0,"[""['Guidepost Growth Equity'"", ""'Monkfish Equi...",https://www.failory.com/startups/austria,51.0,100.0,37687997.0,"[""GPS"", ""Pet"", ""Software"", ""Wearables""]","[""Michael Hurnaus"", ""Michael Lettner"", ""Michae..."
2,3,Austria,GoStudent,"In a virtual learning environment, GoStudent i...",Wien,Vienna,2016.0,"Felix Ohswald, Gregor Müller, Moritz Ohswald","Apps, Continuing Education, EdTech, Education",251-500,"$100,662,844",6.0,7.0,"[""DN Capital"", ""Coatue"", ""Speedinvest"", ""Left ...",https://www.failory.com/startups/austria,251.0,500.0,100662844.0,"[""Apps"", ""Continuing Education"", ""EdTech"", ""Ed...","[""Felix Ohswald"", ""Gregor M\u00c3\u00bcller"", ..."
3,4,Austria,Allcyte,Allcyte is a biotech start-up focused on patie...,Wien,Vienna,2017.0,"Berend Snijder, Giulio Superti-Furga, Gregory ...","Biotechnology, Health Diagnostics, Machine Lea...",11-50,"$6,000,000",2.0,5.0,"[""42CAP"", ""Air Street Capital"", ""PUSH Ventures...",https://www.failory.com/startups/austria,11.0,50.0,6000000.0,"[""Biotechnology"", ""Health Diagnostics"", ""Machi...","[""Berend Snijder"", ""Giulio Superti-Furga"", ""Gr..."
4,5,Austria,Adverity,Adverity is a data-driven marketing analytics ...,Wien,Vienna,2015.0,"Alexander Igelsböck, Andreas Glänzer, Martin B...","Advertising, Analytics, Artificial Intelligenc...",101-250,"$46,298,685",4.0,7.0,"[""Sapphire Ventures"", ""Speedinvest"", ""Felix Ca...",https://www.failory.com/startups/austria,101.0,250.0,46298685.0,"[""Advertising"", ""Analytics"", ""Artificial Intel...","[""Alexander Igelsb\u00c3\u00b6ck"", ""Andreas Gl..."
