In [3]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime

In [4]:
money = '/content/drive/MyDrive/layoffs_staging2.csv'
layoff = pd.read_csv(money)
print(layoff.info())
layoff

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 9 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   company                1000 non-null   object 
 1   location               1000 non-null   object 
 2   industry               999 non-null    object 
 3   total_laid_off         816 non-null    float64
 4   percentage_laid_off    788 non-null    float64
 5   date                   999 non-null    object 
 6   stage                  998 non-null    object 
 7   country                1000 non-null   object 
 8   funds_raised_millions  911 non-null    float64
dtypes: float64(3), object(6)
memory usage: 70.4+ KB
None


Unnamed: 0,company,location,industry,total_laid_off,percentage_laid_off,date,stage,country,funds_raised_millions
0,Included Health,SF Bay Area,Healthcare,,0.06,2022-07-25,Series E,United States,272.0
1,&Open,Dublin,Marketing,9.0,0.09,2022-11-17,Series A,Ireland,35.0
2,#Paid,Toronto,Marketing,19.0,0.17,2023-01-27,Series B,Canada,21.0
3,100 Thieves,Los Angeles,Consumer,12.0,,2022-07-13,Series C,United States,120.0
4,10X Genomics,SF Bay Area,Healthcare,100.0,0.08,2022-08-04,Post-IPO,United States,242.0
...,...,...,...,...,...,...,...,...,...
995,Loggi,Sao Paulo,Logistics,500.0,0.15,2022-08-08,Series F,Brazil,507.0
996,Loja Integrada,Sao Paulo,Retail,25.0,0.10,2022-08-25,Acquired,Brazil,
997,Lokalise,Dover,Other,76.0,0.23,2022-11-16,Series B,United States,56.0
998,Lola,Boston,Travel,34.0,,2020-03-19,Series C,United States,81.0


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
# --- Basic cleanup ---
layoff.columns = [c.strip().lower() for c in layoff.columns]

# Parse date safely
layoff["date"] = pd.to_datetime(layoff["date"], errors="coerce")

# Ensure numerics
for col in ["total_laid_off", "percentage_laid_off", "funds_raised_millions"]:
    if col in layoff.columns:
        layoff[col] = pd.to_numeric(layoff[col], errors="coerce")

# Drop rows without dates (needed for time series)
layoff = layoff.dropna(subset=["date"]).copy()

# --- Time features ---
layoff["year"] = layoff["date"].dt.year
layoff["month"] = layoff["date"].dt.month
layoff["year_month"] = layoff["date"].dt.to_period("M").astype(str)

# --- Helpful transforms ---
layoff["funds_log"] = np.log1p(layoff["funds_raised_millions"])

# Severity buckets based on percentage laid off
def severity_bucket(p):
    if pd.isna(p): return "Unknown"
    if p < 0.10: return "Low (<10%)"
    if p < 0.25: return "Medium (10–25%)"
    if p < 0.50: return "High (25–50%)"
    return "Extreme (50%+)"
layoff["severity"] = layoff["percentage_laid_off"].apply(severity_bucket)

layoff.shape, layoff["date"].min(), layoff["date"].max()


((999, 14), Timestamp('2020-03-11 00:00:00'), Timestamp('2023-03-06 00:00:00'))

In [6]:
total_laid_off = int(np.nansum(layoff["total_laid_off"]))
events = int(layoff.shape[0])
avg_pct = float(np.nanmean(layoff["percentage_laid_off"]))  # keep float for display
median_funds = float(np.nanmedian(layoff["funds_raised_millions"]))

print("Total laid off:", total_laid_off)
print("Layoff events:", events)
print("Avg % laid off:", round(avg_pct * 100, 1), "%")
print("Median funds raised (M):", int(round(median_funds)))


Total laid off: 192867
Layoff events: 999
Avg % laid off: 25.9 %
Median funds raised (M): 160


In [7]:
monthly = (layoff.groupby("year_month", as_index=False)
           .agg(
               layoff_events=("company", "count"),
               total_laid_off=("total_laid_off", "sum"),
               avg_percentage_laid_off=("percentage_laid_off", "mean")
           ))

monthly["total_laid_off"] = monthly["total_laid_off"].fillna(0)

monthly.head(10)

Unnamed: 0,year_month,layoff_events,total_laid_off,avg_percentage_laid_off
0,2020-03,54,5100.0,0.379268
1,2020-04,115,11193.0,0.217216
2,2020-05,42,10472.0,0.321316
3,2020-06,25,3073.0,0.237895
4,2020-07,10,5870.0,0.384
5,2020-08,3,178.0,0.726667
6,2020-09,7,339.0,0.466
7,2020-10,3,118.0,0.125
8,2020-11,5,189.0,0.5625
9,2020-12,2,174.0,0.8


In [22]:
fig1 = px.line(monthly, x="year_month", y="layoff_events",
               title="Layoff Events per Month")
fig1.show()

fig2 = px.line(monthly, x="year_month", y="total_laid_off",
               title="Total People Laid Off per Month")
fig2.show()

fig3 = px.line(monthly, x="year_month", y="avg_percentage_laid_off",
               title="Average % Laid Off per Month")
fig3.update_yaxes(tickformat=".0%")
fig3.show()


In [10]:
by_industry = (layoff.groupby("industry", as_index=False)
               .agg(total_laid_off=("total_laid_off","sum"),
                    events=("company","count"),
                    avg_pct=("percentage_laid_off","mean"))
               .sort_values("total_laid_off", ascending=False)
               .head(15))

px.bar(by_industry, x="industry", y="total_laid_off",
       title="Top 15 Industries by Total Laid Off").show()


In [16]:
by_country = (layoff.groupby("country", as_index=False)
              .agg(total_laid_off=("total_laid_off","sum"),
                   events=("company","count"))
              .sort_values("total_laid_off", ascending=False)
              .head(15))

px.bar(by_country, x="country", y="total_laid_off",
       title="Top 15 Countries by Total Laid Off").show()


In [13]:
by_stage = (layoff.groupby("stage", as_index=False)
            .agg(total_laid_off=("total_laid_off","sum"),
                 events=("company","count"),
                 avg_pct=("percentage_laid_off","mean"))
            .sort_values("total_laid_off", ascending=False))

px.bar(by_stage, x="Stage", y="total_laid_off",
       title="Layoffs by Company Stage").show()


In [18]:
px.histogram(layoff, x="percentage_laid_off", nbins=30, title="% Laid Off Distribution")\
  .update_xaxes(tickformat=".0%").show()

px.histogram(layoff, x="total_laid_off", nbins=40, title="Total Laid Off Distribution").show()

px.histogram(layoff, x="funds_raised_millions", nbins=40, title="Funds Raised (Millions) Distribution").show()

px.histogram(layoff, x="severity", title="Severity Buckets").show()


In [19]:
from ipywidgets import interact, widgets

industries = ["All"] + sorted([x for x in layoff["industry"].dropna().unique()])
countries  = ["All"] + sorted([x for x in layoff["country"].dropna().unique()])
stages     = ["All"] + sorted([x for x in layoff["stage"].dropna().unique()])

@interact(
    industry=widgets.Dropdown(options=industries, value="All"),
    country=widgets.Dropdown(options=countries, value="All"),
    stage=widgets.Dropdown(options=stages, value="All"),
)
def filtered_views(industry, country, stage):
    dff = layoff.copy()
    if industry != "All":
        dff = dff[dff["industry"] == industry]
    if country != "All":
        dff = dff[dff["country"] == country]
    if stage != "All":
        dff = dff[dff["stage"] == stage]

    monthly_f = (dff.groupby("year_month", as_index=False)
                 .agg(events=("company","count"),
                      total_laid_off=("total_laid_off","sum")))
    monthly_f["total_laid_off"] = monthly_f["total_laid_off"].fillna(0)

    total = int(np.nansum(dff["total_laid_off"]))
    evts = int(dff.shape[0])
    print("Total laid off:", total, "| Events:", evts)

    px.line(monthly_f, x="year_month", y="events",
            title="Events per Month (filtered)").show()
    px.line(monthly_f, x="year_month", y="total_laid_off",
            title="Total Laid Off per Month (filtered)").show()


interactive(children=(Dropdown(description='industry', options=('All', 'Aerospace', 'Construction', 'Consumer'…