In [1]:
import pandas as pd
import numpy as np

# Load your ORIGINAL dataset
df = pd.read_csv('Shark Tank US dataset.csv')

print("Original data loaded...")

# 1. Handle "Not Applicable" NaN values in Deal Columns 
deal_columns = [
    'Total Deal Amount', 'Total Deal Equity', 'Deal Valuation',
    'Number of Sharks in Deal', 'Investment Amount Per Shark', 'Equity Per Shark',
    'Barbara Corcoran Investment Amount', 'Barbara Corcoran Investment Equity',
    'Mark Cuban Investment Amount', 'Mark Cuban Investment Equity',
    'Lori Greiner Investment Amount', 'Lori Greiner Investment Equity',
    'Robert Herjavec Investment Amount', 'Robert Herjavec Investment Equity',
    'Daymond John Investment Amount', 'Daymond John Investment Equity',
    'Kevin O Leary Investment Amount', 'Kevin O Leary Investment Equity',
    'Guest Investment Amount', 'Guest Investment Equity', 'Loan'
]
df[deal_columns] = df[deal_columns].fillna(0)
print("Cleaned numerical NaN values (filled with 0).")

# 2. Handle "Truly Missing" Categorical Data
df['Pitchers Gender'] = df['Pitchers Gender'].fillna('Unknown')
df['Pitchers State'] = df['Pitchers State'].fillna('Unknown')
df['Industry'] = df['Industry'].fillna('Unknown')
print("Cleaned categorical NaN values (filled with 'Unknown').")

# 3. Fix Data Types 
df['Original Air Date'] = pd.to_datetime(df['Original Air Date'], format='%d-%b-%y')
print("Converted 'Original Air Date' to datetime format.")

# 4. Drop Unnecessary Columns 
columns_to_drop = [
    'Pitchers Average Age',
    'Company Website',
    'Entrepreneur Names',
    'Season Start',
    'Season End'
]
df = df.drop(columns=columns_to_drop)
print("Dropped unnecessary columns.")

# 5. Save the Cleaned Data
df.to_csv('cleaned_shark_tank.csv', index=False)

print("\n--- SUCCESS! ---")
print("Your 'cleaned_shark_tank.csv' file has been created in your folder.")

Original data loaded...
Cleaned numerical NaN values (filled with 0).
Cleaned categorical NaN values (filled with 'Unknown').
Converted 'Original Air Date' to datetime format.
Dropped unnecessary columns.

--- SUCCESS! ---
Your 'cleaned_shark_tank.csv' file has been created in your folder.


In [2]:
import pandas as pd

# Load the CLEANED dataset
df = pd.read_csv('cleaned_shark_tank.csv')

#1. Overall Success Rate 
print(" Overall Analysis ")
total_pitches = len(df)
deals_made = df['Got Deal'].sum()
success_rate = (deals_made / total_pitches) * 100
print(f"Total Pitches: {total_pitches}")
print(f"Deals Made: {deals_made}")
print(f"Overall Success Rate: {success_rate:.2f}%\n")

# --- 2. Industry Trends ---
print(" Top 10 Industries by Number of Deals ")
industry_deals = df.groupby('Industry')['Got Deal'].sum().sort_values(ascending=False)
print(industry_deals.head(10))
print("\n")

# --- 3. Investor Success (Shark Leaderboard) ---
print(" Shark Leaderboard (Total Deals Made) ")
sharks = {
    'Barbara Corcoran': (df['Barbara Corcoran Investment Amount'] > 0).sum(),
    'Mark Cuban': (df['Mark Cuban Investment Amount'] > 0).sum(),
    'Lori Greiner': (df['Lori Greiner Investment Amount'] > 0).sum(),
    'Robert Herjavec': (df['Robert Herjavec Investment Amount'] > 0).sum(),
    'Daymond John': (df['Daymond John Investment Amount'] > 0).sum(),
    'Kevin O Leary': (df['Kevin O Leary Investment Amount'] > 0).sum(),
    'Guest': (df['Guest Investment Amount'] > 0).sum()
}
shark_leaderboard = sorted(sharks.items(), key=lambda item: item[1], reverse=True)
for shark, deals in shark_leaderboard:
    print(f"{shark}: {deals} deals")
print("\n")

# --- 4. Valuation Analysis ---
print(" Valuation Analysis ")
avg_requested_valuation = df['Valuation Requested'].mean()
print(f"Average Requested Valuation: ${avg_requested_valuation:,.2f}")

avg_deal_valuation = df[df['Got Deal'] == 1]['Deal Valuation'].mean()
print(f"Average Final Deal Valuation (for successful deals): ${avg_deal_valuation:,.2f}")

negotiation_diff = avg_requested_valuation - avg_deal_valuation
print(f"Average Negotiation Difference: ${negotiation_diff:,.2f}\n")

# --- 5. Founder Profile Analysis (Gender) ---
print(" Success Rate by Pitcher's Gender ")
gender_analysis = df.groupby('Pitchers Gender')['Got Deal'].mean() * 100
print(gender_analysis.sort_values(ascending=False))

 Overall Analysis 
Total Pitches: 1441
Deals Made: 882
Overall Success Rate: 61.21%

 Top 10 Industries by Number of Deals 
Industry
Food and Beverage          188
Lifestyle/Home             173
Fashion/Beauty             134
Fitness/Sports/Outdoors     93
Children/Education          87
Health/Wellness             40
Technology/Software         38
Pet Products                34
Business Services           22
Media/Entertainment         18
Name: Got Deal, dtype: int64


 Shark Leaderboard (Total Deals Made) 
Mark Cuban: 263 deals
Lori Greiner: 231 deals
Barbara Corcoran: 141 deals
Robert Herjavec: 135 deals
Guest: 135 deals
Kevin O Leary: 133 deals
Daymond John: 123 deals


 Valuation Analysis 
Average Requested Valuation: $3,662,736.68
Average Final Deal Valuation (for successful deals): $2,197,795.11
Average Negotiation Difference: $1,464,941.57

 Success Rate by Pitcher's Gender 
Pitchers Gender
Unknown       71.428571
Mixed Team    66.304348
Female        63.636364
Male          58.