## Company X's StartUp Battlefield event project

### 1. Datawarehouse

#### Importing required python libraries

In [43]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [44]:
# Reading data from csv
startupData = pd.read_csv('CompanyX_EU.csv')
startupData.head()

Unnamed: 0,Startup,Product,Funding,Event,Result,OperatingState
0,2600Hz,2600hz.com,,Disrupt SF 2013,Contestant,Operating
1,3DLT,3dlt.com,$630K,Disrupt NYC 2013,Contestant,Closed
2,3DPrinterOS,3dprinteros.com,,Disrupt SF 2016,Contestant,Operating
3,3Dprintler,3dprintler.com,$1M,Disrupt NY 2016,Audience choice,Operating
4,42 Technologies,42technologies.com,,Disrupt NYC 2013,Contestant,Operating


### 2. Data Exploration

In [45]:
#Shape of dataset
startupData.shape

(662, 6)

In [46]:
## Datatype of each attribute
startupData.dtypes

Startup           object
Product           object
Funding           object
Event             object
Result            object
OperatingState    object
dtype: object

In [47]:
startupData.dtypes.to_frame(name = 'Data Type')

Unnamed: 0,Data Type
Startup,object
Product,object
Funding,object
Event,object
Result,object
OperatingState,object


In [48]:
# Info about dataset 
startupData.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 662 entries, 0 to 661
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Startup         662 non-null    object
 1   Product         656 non-null    object
 2   Funding         448 non-null    object
 3   Event           662 non-null    object
 4   Result          662 non-null    object
 5   OperatingState  662 non-null    object
dtypes: object(6)
memory usage: 31.2+ KB


In [49]:
#null check
startupData.isnull().any()

Startup           False
Product            True
Funding            True
Event             False
Result            False
OperatingState    False
dtype: bool

In [50]:
# Number of missing/null values in dataframe
startupData.isna().sum().sum()

220

In [51]:
startupData.describe()

Unnamed: 0,Startup,Product,Funding,Event,Result,OperatingState
count,662,656,448,662,662,662
unique,662,656,240,26,5,4
top,HealthCrew,crowdspirit.com,$1M,TC50 2008,Contestant,Operating
freq,1,1,17,52,488,465


In [52]:
startupData.describe().transpose()

Unnamed: 0,count,unique,top,freq
Startup,662,662,HealthCrew,1
Product,656,656,crowdspirit.com,1
Funding,448,240,$1M,17
Event,662,26,TC50 2008,52
Result,662,5,Contestant,488
OperatingState,662,4,Operating,465


### 3. Data preprocessing & visualisation

In [53]:
# Dropping null values (with inplace False)
cleanedStartupData = startupData.dropna(axis='index', how='any', thresh=None, subset=None, inplace=False)
cleanedStartupData

Unnamed: 0,Startup,Product,Funding,Event,Result,OperatingState
1,3DLT,3dlt.com,$630K,Disrupt NYC 2013,Contestant,Closed
3,3Dprintler,3dprintler.com,$1M,Disrupt NY 2016,Audience choice,Operating
5,5to1,5to1.com,$19.3M,TC50 2009,Contestant,Acquired
6,8 Securities,8securities.com,$29M,Disrupt Beijing 2011,Finalist,Operating
10,AdhereTech,adheretech.com,$1.8M,Hardware Battlefield 2014,Contestant,Operating
...,...,...,...,...,...,...
657,Zivity,zivity.com,$8M,TC40 2007,Contestant,Operating
658,Zmorph,zmorph3d.com,$1M,-,Audience choice,Operating
659,Zocdoc,zocdoc.com,$223M,TC40 2007,Contestant,Operating
660,Zula,zulaapp.com,$3.4M,Disrupt SF 2013,Audience choice,Operating


In [54]:
#null check
cleanedStartupData.isnull().any()

## Dataset is cleaned. No null values

Startup           False
Product           False
Funding           False
Event             False
Result            False
OperatingState    False
dtype: bool

In [55]:
cleanedStartupData.isna().sum().sum()

# null values count is 0.Data cleaned

0

In [60]:
cleanedStartupData

Unnamed: 0,Startup,Product,Funding,Event,Result,OperatingState
1,3DLT,3dlt.com,630K,Disrupt NYC 2013,Contestant,Closed
3,3Dprintler,3dprintler.com,1M,Disrupt NY 2016,Audience choice,Operating
5,5to1,5to1.com,19.3M,TC50 2009,Contestant,Acquired
6,8 Securities,8securities.com,29M,Disrupt Beijing 2011,Finalist,Operating
10,AdhereTech,adheretech.com,1.8M,Hardware Battlefield 2014,Contestant,Operating
...,...,...,...,...,...,...
657,Zivity,zivity.com,8M,TC40 2007,Contestant,Operating
658,Zmorph,zmorph3d.com,1M,-,Audience choice,Operating
659,Zocdoc,zocdoc.com,223M,TC40 2007,Contestant,Operating
660,Zula,zulaapp.com,3.4M,Disrupt SF 2013,Audience choice,Operating
