Download dataset

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("thirumani/shark-tank-us-dataset")

print("Path to dataset files:", path)

Import libraries


In [None]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np
import os

Load dataset


In [None]:
file_path = os.path.join(path, "Shark Tank US dataset.csv")
df = pd.read_csv(file_path)

Industries liked by shark vs viewers

In [None]:
#Find sum of deal and viewers in each industry
result_sum = df.groupby('Industry')[['Got Deal', 'US Viewership']].sum().reset_index()

# Find the count of entries in each 'Industry'
industry_count = df['Industry'].value_counts().reset_index()
industry_count.columns = ['Industry', 'Count']

# Merge the sum and count results
final_result = pd.merge(result_sum, industry_count, on='Industry', how='outer')

# Calculate success rate
final_result['success rate'] = final_result['Got Deal'] / final_result['Count'] * 100
final_result = final_result[final_result['Industry']!='Uncertain/Other']

# Create a scatter plot using Plotly
fig = px.scatter(
    final_result,
    x='US Viewership',
    y='success rate',
    hover_name='Industry',
    text='Industry',
    title='US Viewership vs Success Rate by Industry',
    labels={'US Viewership': 'US Viewership (in million)', 'success rate': 'Success Rate (%)'},
    color_discrete_sequence=['blue']
)

# Update trace to improve text visibility
fig.update_traces(
    textposition='top center',
    textfont=dict(size=20),
    marker=dict(opacity=0.7)
)

# Update layout for better readability and set x-axis ticks
fig.update_layout(
    width=1800,
    height=1200,
    xaxis=dict(
        title_font=dict(size=20), 
        tickfont=dict(size=12),
        dtick=400,  # Set x-axis tick interval to 400
        range=[0, max(final_result['US Viewership']) + 100]  # Adjust x-axis range
    ),
    yaxis=dict(title_font=dict(size=20), tickfont=dict(size=12)),
    title=dict(font=dict(size=20)),
    hovermode='closest'
)

# Adjust margins to prevent cutoff
fig.update_layout(margin=dict(l=20, r=20, t=50, b=20))

# Show the plot
fig.show()


Gender pie charts

In [None]:
# Filter the dataframe for deals that were made
deals_made = df[df['Got Deal'] == 1]

# Calculate the percentage of each gender class that got a deal
gender_percentages = deals_made['Pitchers Gender'].value_counts(normalize=True) * 100

# Create a pie chart using plotly
plt.figure(figsize=(8, 8))
plt.pie(gender_percentages.values, labels=gender_percentages.index, autopct='%1.1f%%', startangle=90,textprops={'fontsize': 20})
plt.title('Gender Distribution of Successful Pitchers', fontsize =20 )
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle

# Show the plot
plt.show()

#Plot geneder for successful and unsuccessful deals
gender_counts = df['Pitchers Gender'].value_counts()

# Create a pie chart
plt.figure(figsize=(10, 8))
plt.pie(gender_counts.values, labels=gender_counts.index, autopct='%1.1f%%', startangle=90,textprops={'fontsize': 20})
plt.title('Gender Distribution of Pitchers', fontsize =20 )
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle

# Display the chart
plt.show()