# Indian Startups Funding Analysis

In [17]:
import numpy as np 
import pandas as pd 

### Obtaining the data

In [18]:
funding_data = pd.read_excel("funding.xlsx")
funding_data.head()

Unnamed: 0,Date,Startup Name,Industry Vertical,SubVertical,City,Investors Name,Investment Type,Amount in USD,Remarks
0,2020-01-09,BYJUS,E-Tech,E-learning,Bangalore,Tiger Global Management,Private Equity Round,200000000,
1,2020-01-13,Shuttl,Transportation,App based shuttle service,Gurgaon,Susquehanna Growth Equity,Series C,8048394,
2,2020-01-09,Mamaearth,E-commerce,Retailer of baby and toddler products,Bangalore,Sequoia Capital India,Series B,18358860,
3,2020-01-02,wealthbucket,FinTech,Online Investment,New Delhi,Vinod Khatumal,Pre-series A,3000000,
4,2020-01-02,Fashor,Fashion and Apparel,Embroiled Clothes For Women,Mumbai,Sprout Venture Partners,Seed Round,1800000,


### Exploratory Analysis

In [19]:
print("Size of data",funding_data.shape)

Size of data (3044, 9)


Missing Data

In [20]:
total = funding_data.isnull().sum().sort_values(ascending = False)
percent = ((funding_data.isnull().sum()/funding_data.isnull().count())*100).sort_values(ascending = False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_data

Unnamed: 0,Total,Percent
Remarks,2625,86.235217
Amount in USD,964,31.668857
SubVertical,936,30.749014
City,184,6.044678
Industry Vertical,171,5.617608
Investors Name,24,0.788436
Investment Type,4,0.131406
Startup Name,0,0.0
Date,0,0.0


Removing "Remarks" column from the data

In [21]:
del funding_data["Remarks"]
funding_data.head()

Unnamed: 0,Date,Startup Name,Industry Vertical,SubVertical,City,Investors Name,Investment Type,Amount in USD
0,2020-01-09,BYJUS,E-Tech,E-learning,Bangalore,Tiger Global Management,Private Equity Round,200000000
1,2020-01-13,Shuttl,Transportation,App based shuttle service,Gurgaon,Susquehanna Growth Equity,Series C,8048394
2,2020-01-09,Mamaearth,E-commerce,Retailer of baby and toddler products,Bangalore,Sequoia Capital India,Series B,18358860
3,2020-01-02,wealthbucket,FinTech,Online Investment,New Delhi,Vinod Khatumal,Pre-series A,3000000
4,2020-01-02,Fashor,Fashion and Apparel,Embroiled Clothes For Women,Mumbai,Sprout Venture Partners,Seed Round,1800000


Converting "Amount in USD" to numeric

In [22]:
funding_data["Amount in USD"] = funding_data["Amount in USD"].replace(["undisclosed","unknown","N/A","Undisclosed"],"0")
funding_data["Amount in USD"] = funding_data["Amount in USD"].apply(lambda x: float(str(x).replace(",","")))

In [23]:
funding_data["Amount in USD"] = pd.to_numeric(funding_data["Amount in USD"])
funding_data.head()

Unnamed: 0,Date,Startup Name,Industry Vertical,SubVertical,City,Investors Name,Investment Type,Amount in USD
0,2020-01-09,BYJUS,E-Tech,E-learning,Bangalore,Tiger Global Management,Private Equity Round,200000000.0
1,2020-01-13,Shuttl,Transportation,App based shuttle service,Gurgaon,Susquehanna Growth Equity,Series C,8048394.0
2,2020-01-09,Mamaearth,E-commerce,Retailer of baby and toddler products,Bangalore,Sequoia Capital India,Series B,18358860.0
3,2020-01-02,wealthbucket,FinTech,Online Investment,New Delhi,Vinod Khatumal,Pre-series A,3000000.0
4,2020-01-02,Fashor,Fashion and Apparel,Embroiled Clothes For Women,Mumbai,Sprout Venture Partners,Seed Round,1800000.0


Removing rows with no values in Amount in USD column

In [125]:
funding_data = funding_data[funding_data["Amount in USD"] != -1]
funding_data.dropna(subset = ["Amount in USD"], inplace=True)

Exporting to Excel

In [24]:
from pandas import ExcelWriter

writer = ExcelWriter('FundingFinal.xlsx')
funding_data.to_excel(writer,'Sheet1')
writer.save()

Further analysis will be performed using Tableau
Workbook can be found [here](https://public.tableau.com/shared/B989MGCBF?:display_count=y&:origin=viz_share_link)

The final developed storyboard for the dataset

In [1]:
%%HTML
<div class='tableauPlaceholder' id='viz1592574020769' style='position: relative'><noscript><a href='#'><img alt=' ' src='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;JF&#47;JF86HQKB9&#47;1_rss.png' style='border: none' /></a></noscript><object class='tableauViz'  style='display:none;'><param name='host_url' value='https%3A%2F%2Fpublic.tableau.com%2F' /> <param name='embed_code_version' value='3' /> <param name='path' value='shared&#47;JF86HQKB9' /> <param name='toolbar' value='yes' /><param name='static_image' value='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;JF&#47;JF86HQKB9&#47;1.png' /> <param name='animate_transition' value='yes' /><param name='display_static_image' value='yes' /><param name='display_spinner' value='yes' /><param name='display_overlay' value='yes' /><param name='display_count' value='yes' /><param name='language' value='en' /></object></div>                <script type='text/javascript'>                    var divElement = document.getElementById('viz1592574020769');                    var vizElement = divElement.getElementsByTagName('object')[0];                    vizElement.style.width='100%';vizElement.style.height=(divElement.offsetWidth*0.75)+'px';                    var scriptElement = document.createElement('script');                    scriptElement.src = 'https://public.tableau.com/javascripts/api/viz_v1.js';                    vizElement.parentNode.insertBefore(scriptElement, vizElement);                </script>