In [1]:
import pandas as pd

In [3]:
df = pd.read_csv('/Users/dhanushsiddharthapinjerla/Desktop/Elevate labs/project/Shark Tank India.csv')

In [12]:
df.columns = [c.strip().lower().replace(" ", "_") for c in df.columns]

In [23]:
print(" Dataset loaded successfully.")
print(f"Shape: {df.shape}")
print("Columns:", df.columns[:10])

 Dataset loaded successfully.
Shape: (634, 80)
Columns: Index(['season_number', 'startup_name', 'episode_number', 'pitch_number',
       'season_start', 'season_end', 'original_air_date', 'episode_title',
       'anchor', 'industry'],
      dtype='object')


In [24]:
cols_needed = [
    "startup_name", "industry", "original_ask_amount", "total_deal_amount",
    "number_of_sharks_in_deal", "total_deal_equity", "number_of_presenters"
]


In [25]:
subset = df[cols_needed].copy()

In [26]:
for col in ["original_ask_amount", "total_deal_amount"]:
    subset[col] = (
        subset[col]
        .astype(str)
        .str.replace("[₹,]", "", regex=True)  # remove ₹ and commas
        .str.extract("(\d+\.?\d*)")[0]        # extract numbers only
        .astype(float)
    )

In [27]:
subset["funded_flag"] = subset["total_deal_amount"].notna() & (subset["total_deal_amount"] > 0)

In [28]:
industry_summary = subset.groupby("industry").agg(
    total_startups=("startup_name", "count"),
    funded_startups=("funded_flag", "sum"),
    total_funding=("total_deal_amount", "sum"),
    avg_funding=("total_deal_amount", "mean")
).reset_index().sort_values("total_funding", ascending=False)

In [29]:
founder_success = subset.groupby("number_of_presenters").agg(
    total=("startup_name", "count"),
    funded=("funded_flag", "sum")
).reset_index()

In [30]:
founder_success["success_rate_%"] = (founder_success["funded"] / founder_success["total"]) * 100


In [31]:
subset.to_csv("Cleaned_SharkTank_Main.csv", index=False)
industry_summary.to_csv("Tableau_Industry_Summary.csv", index=False)
founder_success.to_csv("Tableau_Founder_Success.csv", index=False)

In [32]:
print(" Files exported successfully:")
print("   - Cleaned_SharkTank_Main.csv")
print("   - Tableau_Industry_Summary.csv")
print("   - Tableau_Founder_Success.csv")

 Files exported successfully:
   - Cleaned_SharkTank_Main.csv
   - Tableau_Industry_Summary.csv
   - Tableau_Founder_Success.csv


In [33]:
print("\n Top 5 Industries by Total Funding:")
print(industry_summary.head())


 Top 5 Industries by Total Funding:
               industry  total_startups  funded_startups  total_funding  \
2        Beauty/Fashion             127               74     5938.00000   
8     Food and Beverage             142               82     5372.60000   
14       Medical/Health              56               41     3383.00101   
16  Technology/Software              58               36     2887.00000   
13        Manufacturing              52               26     1631.00000   

    avg_funding  
2     80.243243  
8     65.519512  
14    82.512220  
16    80.194444  
13    62.730769  


In [34]:
print("\n Founder Success Summary:")
print(founder_success)


 Founder Success Summary:
   number_of_presenters  total  funded  success_rate_%
0                     1    165      94       56.969697
1                     2    334     188       56.287425
2                     3    110      65       59.090909
3                     4     21      12       57.142857
4                     5      3       0        0.000000
5                     6      1       0        0.000000
