# Introduction

As a data analyst tasked to analize the datasets provided (Indian Start-up Funding 2018-2021) and advice which sector is lucrative enough to invest into based on findings and conclusions drawn from the analysis. 

In [1]:
## Load appropriate modules
import warnings
import re
import pandas as pd

In [2]:
warnings.filterwarnings('ignore')

Load all datasets

In [3]:
df_18 = pd.read_csv('./data/startup_funding2018.csv')

Print the heads of all datasets to see how they look

In [4]:
df_18.head()

Unnamed: 0,Company Name,Industry,Round/Series,Amount,Location,About Company
0,TheCollegeFever,"Brand Marketing, Event Promotion, Marketing, S...",Seed,250000,"Bangalore, Karnataka, India","TheCollegeFever is a hub for fun, fiesta and f..."
1,Happy Cow Dairy,"Agriculture, Farming",Seed,"₹40,000,000","Mumbai, Maharashtra, India",A startup which aggregates milk from dairy far...
2,MyLoanCare,"Credit, Financial Services, Lending, Marketplace",Series A,"₹65,000,000","Gurgaon, Haryana, India",Leading Online Loans Marketplace in India
3,PayMe India,"Financial Services, FinTech",Angel,2000000,"Noida, Uttar Pradesh, India",PayMe India is an innovative FinTech organizat...
4,Eunimart,"E-Commerce Platforms, Retail, SaaS",Seed,—,"Hyderabad, Andhra Pradesh, India",Eunimart is a one stop solution for merchants ...


In [5]:
df_18.shape

(526, 6)

In [6]:
## getting the statistics 
df_18.describe()

Unnamed: 0,Company Name,Industry,Round/Series,Amount,Location,About Company
count,526,526,526,526,526,526
unique,525,405,21,198,50,524
top,TheCollegeFever,—,Seed,—,"Bangalore, Karnataka, India","TheCollegeFever is a hub for fun, fiesta and f..."
freq,2,30,280,148,102,2


In [7]:
df_18.columns

Index(['Company Name', 'Industry', 'Round/Series', 'Amount', 'Location',
       'About Company'],
      dtype='object')

In [8]:
df_18["Location"] = df_18['Location'].str.split(',').str[0]

In [9]:
bengaluru = df_18.loc[df_18['Location'] == 'Bengaluru'].count()

In [10]:
if(bengaluru['Location'].sum() > 0):
    df_18['Location'] = df_18['Location'].str.replace('Bengaluru','Bangalore')

In [11]:
df_18.rename(columns = {'Company Name':'Company/Brand',
                        'Industry':'Sector',
                        'Amount':'Amount($)',
                        'Location':'HeadQuarter',
                        'About Company':'What it does'}, 
             inplace = True)

In [12]:
df_18.columns

Index(['Company/Brand', 'Sector', 'Round/Series', 'Amount($)', 'HeadQuarter',
       'What it does'],
      dtype='object')

In [13]:
df_18

Unnamed: 0,Company/Brand,Sector,Round/Series,Amount($),HeadQuarter,What it does
0,TheCollegeFever,"Brand Marketing, Event Promotion, Marketing, S...",Seed,250000,Bangalore,"TheCollegeFever is a hub for fun, fiesta and f..."
1,Happy Cow Dairy,"Agriculture, Farming",Seed,"₹40,000,000",Mumbai,A startup which aggregates milk from dairy far...
2,MyLoanCare,"Credit, Financial Services, Lending, Marketplace",Series A,"₹65,000,000",Gurgaon,Leading Online Loans Marketplace in India
3,PayMe India,"Financial Services, FinTech",Angel,2000000,Noida,PayMe India is an innovative FinTech organizat...
4,Eunimart,"E-Commerce Platforms, Retail, SaaS",Seed,—,Hyderabad,Eunimart is a one stop solution for merchants ...
...,...,...,...,...,...,...
521,Udaan,"B2B, Business Development, Internet, Marketplace",Series C,225000000,Bangalore,"Udaan is a B2B trade platform, designed specif..."
522,Happyeasygo Group,"Tourism, Travel",Series A,—,Haryana,HappyEasyGo is an online travel domain.
523,Mombay,"Food and Beverage, Food Delivery, Internet",Seed,7500,Mumbai,Mombay is a unique opportunity for housewives ...
524,Droni Tech,Information Technology,Seed,"₹35,000,000",Mumbai,Droni Tech manufacture UAVs and develop softwa...


In [14]:
df_18.insert(5,"Funding Year", 2018)

In [15]:
df_18 = df_18.reindex(columns=['Company/Brand',
                               'HeadQuarter', 
                               'Sector', 
                               'What it does',
                               'Amount($)',
                               'Funding Year'])

In [16]:
rupees = df_18[df_18["Amount($)"].str.startswith("₹")]
rupees

Unnamed: 0,Company/Brand,HeadQuarter,Sector,What it does,Amount($),Funding Year
1,Happy Cow Dairy,Mumbai,"Agriculture, Farming",A startup which aggregates milk from dairy far...,"₹40,000,000",2018
2,MyLoanCare,Gurgaon,"Credit, Financial Services, Lending, Marketplace",Leading Online Loans Marketplace in India,"₹65,000,000",2018
6,Tripshelf,Kalkaji,"Internet, Leisure, Marketplace",Tripshelf is an online market place for holida...,"₹16,000,000",2018
7,Hyperdata.IO,Hyderabad,Market Research,Hyperdata combines advanced machine learning w...,"₹50,000,000",2018
15,Pitstop,Bangalore,"Automotive, Search Engine, Service Industry",Pitstop offers general repair and maintenance ...,"₹100,000,000",2018
...,...,...,...,...,...,...
513,Nykaa,Mumbai,"Beauty, Fashion, Wellness",Nykaa.com is a premier online beauty and welln...,"₹1,130,000,000",2018
514,Chaayos,New Delhi,"Food and Beverage, Restaurants, Tea",Chaayos was born in November 2012 out of this ...,"₹810,000,000",2018
516,LT Foods,New Delhi,"Food and Beverage, Food Processing, Manufacturing",LT Foods believe that nature will continue to ...,"₹1,400,000,000",2018
517,Multibashi,Bangalore,"E-Learning, Internet",Free language learning platform.,"₹10,000,000",2018


In [17]:
exchange_rate = pd.read_csv('./data/historic_exchange_rate.csv')
exchange_rate

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,AUS,EXCH,TOT,NATUSD,A,2018,1.338412,
1,AUS,EXCH,TOT,NATUSD,A,2019,1.438507,
2,AUS,EXCH,TOT,NATUSD,A,2020,1.453085,
3,AUS,EXCH,TOT,NATUSD,A,2021,1.331224,
4,AUT,EXCH,TOT,NATUSD,A,2018,0.846773,
...,...,...,...,...,...,...,...,...
251,CMR,EXCH,TOT,NATUSD,A,2021,554.530675,
252,SEN,EXCH,TOT,NATUSD,A,2018,555.446458,
253,SEN,EXCH,TOT,NATUSD,A,2019,585.911013,
254,SEN,EXCH,TOT,NATUSD,A,2020,575.586005,


In [18]:
indian_usd_rate = exchange_rate.loc[exchange_rate['LOCATION'] == 'IND']
indian_usd_rate

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
140,IND,EXCH,TOT,NATUSD,A,2018,68.389467,
141,IND,EXCH,TOT,NATUSD,A,2019,70.420341,
142,IND,EXCH,TOT,NATUSD,A,2020,74.099567,
143,IND,EXCH,TOT,NATUSD,A,2021,73.918013,


In [19]:
value = exchange_rate.query("LOCATION == 'IND' and TIME == 2018")

In [20]:
rate_value = float(value['Value'])

In [21]:
df_18['Amount(USD)'] = ''

In [22]:
df_18.loc[df_18['Amount($)'].str.startswith("₹"), 'Amount(USD)'] = 'R'
df_18.loc[df_18['Amount($)'].str.startswith("$"), 'Amount(USD)'] = 'U'
df_18.loc[df_18['Amount($)'] == "—", 'Amount(USD)'] = 'E'

In [23]:
df_18.loc[df_18['Amount($)'].str.startswith("₹"), 'Amount($)'] = df_18['Amount($)'].str[1:]
df_18.loc[df_18['Amount($)'].str.startswith("$"), 'Amount($)'] = df_18['Amount($)'].str[1:]
df_18.loc[df_18['Amount($)'].str.startswith("—"), 'Amount($)'] = '1.0'

In [24]:
df_18.loc[df_18['Amount($)'].str.contains(',', regex=True), 'Amount($)'] = df_18['Amount($)'].str.replace(',','')

In [25]:
df_18['Amount($)'] = df_18['Amount($)'].astype(float)

In [26]:
df_18.loc[df_18['Amount(USD)'] == 'R', 'Amount($)'] = df_18['Amount($)'] / rate_value

In [27]:
df_18.drop(columns=['Amount(USD)'], inplace=True)

In [28]:
df_18

Unnamed: 0,Company/Brand,HeadQuarter,Sector,What it does,Amount($),Funding Year
0,TheCollegeFever,Bangalore,"Brand Marketing, Event Promotion, Marketing, S...","TheCollegeFever is a hub for fun, fiesta and f...",2.500000e+05,2018
1,Happy Cow Dairy,Mumbai,"Agriculture, Farming",A startup which aggregates milk from dairy far...,5.848854e+05,2018
2,MyLoanCare,Gurgaon,"Credit, Financial Services, Lending, Marketplace",Leading Online Loans Marketplace in India,9.504388e+05,2018
3,PayMe India,Noida,"Financial Services, FinTech",PayMe India is an innovative FinTech organizat...,2.000000e+06,2018
4,Eunimart,Hyderabad,"E-Commerce Platforms, Retail, SaaS",Eunimart is a one stop solution for merchants ...,1.000000e+00,2018
...,...,...,...,...,...,...
521,Udaan,Bangalore,"B2B, Business Development, Internet, Marketplace","Udaan is a B2B trade platform, designed specif...",2.250000e+08,2018
522,Happyeasygo Group,Haryana,"Tourism, Travel",HappyEasyGo is an online travel domain.,1.000000e+00,2018
523,Mombay,Mumbai,"Food and Beverage, Food Delivery, Internet",Mombay is a unique opportunity for housewives ...,7.500000e+03,2018
524,Droni Tech,Mumbai,Information Technology,Droni Tech manufacture UAVs and develop softwa...,5.117747e+05,2018


In [29]:
df_18.to_csv('./data/prepped_2018.csv', index=False)

In [30]:
df_20 = pd.read_csv('./data/startup_funding2020.csv')

In [31]:
df_20

Unnamed: 0,Company/Brand,Founded,HeadQuarter,Sector,What it does,Founders,Investor,Amount($),Stage,Unnamed: 9
0,Aqgromalin,2019,Chennai,AgriTech,Cultivating Ideas for Profit,"Prasanna Manogaran, Bharani C L",Angel investors,"$200,000",,
1,Krayonnz,2019,Bangalore,EdTech,An academy-guardian-scholar centric ecosystem ...,"Saurabh Dixit, Gurudutt Upadhyay",GSF Accelerator,"$100,000",Pre-seed,
2,PadCare Labs,2018,Pune,Hygiene management,Converting bio-hazardous waste to harmless waste,Ajinkya Dhariya,Venture Center,Undisclosed,Pre-seed,
3,NCOME,2020,New Delhi,Escrow,Escrow-as-a-service platform,Ritesh Tiwari,"Venture Catalysts, PointOne Capital","$400,000",,
4,Gramophone,2016,Indore,AgriTech,Gramophone is an AgTech platform enabling acce...,"Ashish Rajan Singh, Harshit Gupta, Nishant Mah...","Siana Capital Management, Info Edge","$340,000",,
...,...,...,...,...,...,...,...,...,...,...
1050,Leverage Edu,,Delhi,Edtech,AI enabled marketplace that provides career gu...,Akshay Chaturvedi,"DSG Consumer Partners, Blume Ventures","$1,500,000",,
1051,EpiFi,,,Fintech,It offers customers with a single interface fo...,"Sujith Narayanan, Sumit Gwalani","Sequoia India, Ribbit Capital","$13,200,000",Seed Round,
1052,Purplle,2012,Mumbai,Cosmetics,Online makeup and beauty products retailer,"Manish Taneja, Rahul Dash",Verlinvest,"$8,000,000",,
1053,Shuttl,2015,Delhi,Transport,App based bus aggregator serice,"Amit Singh, Deepanshu Malviya",SIG Global India Fund LLP.,"$8,043,000",Series C,


In [32]:
gurugram = df_20.loc[df_20['HeadQuarter'] == 'Gurugram'].count()

if(gurugram['HeadQuarter'].sum() > 0):
    df_20['HeadQuarter'] = df_20['HeadQuarter'].str.replace('Gurugram','Gurgaon')

In [33]:
df_20[df_20['Amount($)'].isna()]

Unnamed: 0,Company/Brand,Founded,HeadQuarter,Sector,What it does,Founders,Investor,Amount($),Stage,Unnamed: 9
29,Delhivery,2011,Gurgaon,E-commerce,Delhivery is a supply chain services company t...,"Kapil Bharati, Mohit Tandon, Sahil Barua, Sura...","Steadview Capital, Canada Pension Plan Investm...",,,
44,PointOne Capital,2020,Bangalore,Venture capitalist,Pre-seed/Seed focussed VC investor,Mihir Jha,,,,
235,Tring,2019,Mumbai,Social Media,"Tring is a celebrity engagement platform, for ...","Akshay Saini, Rahul Saini, Pranav Chabhadia","Sujeet Kumar, Hemesh Singh",,,


In [34]:
df_20 = df_20[df_20['Amount($)'].notna()]

In [35]:
df_20.loc[df_20['Amount($)'].str.startswith('$'), 'Amount($)'] = df_20['Amount($)'].str[1:]

df_20.loc[df_20['Amount($)'].str.contains(',', regex=True), 'Amount($)'] = df_20['Amount($)'].str.replace(',','')

df_20.loc[df_20['Amount($)'] == '800000000 to $850000000', 'Amount($)'] = '825000000'

df_20.loc[(df_20['Amount($)'] == 'Undiclsosed') 
          | (df_20['Amount($)'] == 'Undislosed') 
          | (df_20['Amount($)'] == 'Undisclosed'), 'Amount($)'] = '1.0'

df_20.at[465, 'Amount($)'] = '23000000'

In [36]:
df_20['Amount($)'] = df_20['Amount($)'].astype(float)

In [37]:
df_20.to_csv('./data/prepped_2020.csv', index=False)

In [38]:
df_19 = pd.read_csv('./data/startup_funding2019.csv')

In [39]:
df_19

Unnamed: 0,Company/Brand,Founded,HeadQuarter,Sector,What it does,Founders,Investor,Amount($),Stage
0,Bombay Shaving,,,Ecommerce,Provides a range of male grooming products,Shantanu Deshpande,Sixth Sense Ventures,"$6,300,000",
1,Ruangguru,2014.0,Mumbai,Edtech,A learning platform that provides topic-based ...,"Adamas Belva Syah Devara, Iman Usman.",General Atlantic,"$150,000,000",Series C
2,Eduisfun,,Mumbai,Edtech,It aims to make learning fun via games.,Jatin Solanki,"Deepak Parekh, Amitabh Bachchan, Piyush Pandey","$28,000,000",Fresh funding
3,HomeLane,2014.0,Chennai,Interior design,Provides interior designing solutions,"Srikanth Iyer, Rama Harinath","Evolvence India Fund (EIF), Pidilite Group, FJ...","$30,000,000",Series D
4,Nu Genes,2004.0,Telangana,AgriTech,"It is a seed company engaged in production, pr...",Narayana Reddy Punyala,Innovation in Food and Agriculture (IFA),"$6,000,000",
...,...,...,...,...,...,...,...,...,...
84,Infra.Market,,Mumbai,Infratech,It connects client requirements to their suppl...,"Aaditya Sharda, Souvik Sengupta","Tiger Global, Nexus Venture Partners, Accel Pa...","$20,000,000",Series A
85,Oyo,2013.0,Gurugram,Hospitality,Provides rooms for comfortable stay,Ritesh Agarwal,"MyPreferred Transformation, Avendus Finance, S...","$693,000,000",
86,GoMechanic,2016.0,Delhi,Automobile & Technology,Find automobile repair and maintenance service...,"Amit Bhasin, Kushal Karwa, Nitin Rana, Rishabh...",Sequoia Capital,"$5,000,000",Series B
87,Spinny,2015.0,Delhi,Automobile,Online car retailer,"Niraj Singh, Ramanshu Mahaur, Ganesh Pawar, Mo...","Norwest Venture Partners, General Catalyst, Fu...","$50,000,000",


In [40]:
df_19.drop(columns=['Founded','Founders','Investor','Stage'], inplace=True)

In [41]:
df_19.insert(5,"Funding Year", 2019)

In [42]:
df_19

Unnamed: 0,Company/Brand,HeadQuarter,Sector,What it does,Amount($),Funding Year
0,Bombay Shaving,,Ecommerce,Provides a range of male grooming products,"$6,300,000",2019
1,Ruangguru,Mumbai,Edtech,A learning platform that provides topic-based ...,"$150,000,000",2019
2,Eduisfun,Mumbai,Edtech,It aims to make learning fun via games.,"$28,000,000",2019
3,HomeLane,Chennai,Interior design,Provides interior designing solutions,"$30,000,000",2019
4,Nu Genes,Telangana,AgriTech,"It is a seed company engaged in production, pr...","$6,000,000",2019
...,...,...,...,...,...,...
84,Infra.Market,Mumbai,Infratech,It connects client requirements to their suppl...,"$20,000,000",2019
85,Oyo,Gurugram,Hospitality,Provides rooms for comfortable stay,"$693,000,000",2019
86,GoMechanic,Delhi,Automobile & Technology,Find automobile repair and maintenance service...,"$5,000,000",2019
87,Spinny,Delhi,Automobile,Online car retailer,"$50,000,000",2019


In [43]:
gurugram = df_19.loc[df_19['HeadQuarter'] == 'Gurugram'].count()

if(gurugram['HeadQuarter'].sum() > 0):
    df_19['HeadQuarter'] = df_19['HeadQuarter'].str.replace('Gurugram','Gurgaon')

In [44]:
df_19.loc[df_19['Amount($)'].str.startswith('$'), 'Amount($)'] = df_19['Amount($)'].str[1:]

df_19.loc[df_19['Amount($)'] == 'Undisclosed', 'Amount($)'] = '1.0'

df_19.loc[df_19['Amount($)'].str.contains(',', regex=True), 'Amount($)'] = df_19['Amount($)'].str.replace(',','')

In [45]:
df_19['Amount($)'] = df_19['Amount($)'].astype(float)

In [46]:
df_19.to_csv('./data/prepped_2019.csv', index=False)

In [None]:
df_19 = pd.read_csv('./data/startup_funding2019.csv')