In [None]:
%pip install pandas numpy matplotlib seaborn scikit-learn
%pip install kagglehub[pandas-datasets]
%pip install tqm
%pip install pmdarima
%pip install keras
%pip install tensorflow
%pip install xgboost lightgbm catboost
%pip install chardet
%pip install pycountry
%pip install pycountry_convert
%pip install geopandas
%pip install statsmodels prophet tensorflow scikit-learn plotly
%pip install ipywidgets
%pip install dash
%pip install jupyter-dash
%pip install lifelines


: 

In [None]:
import pandas as pd

layoffs = pd.read_csv('layoffs.csv')

layoffs.head()

In [None]:
layoffs.info()

In [None]:
layoffs.describe()

In [None]:
layoffs.nunique()

In [None]:
# ------------------------------
# 1. Exploratory Data Analysis 
# ------------------------------

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb

import plotly.graph_objects as go
import plotly.express as px
from dash import Dash, dcc, html

# ------------------------------
# 1. Data Preparation
# ------------------------------
# Convert 'date' column to datetime
layoffs['date'] = pd.to_datetime(layoffs['date'], errors='coerce')
layoffs['year'] = layoffs['date'].dt.year

# Impute numeric columns
numeric_cols = ['total_laid_off', 'percentage_laid_off', 'funds_raised']  
num_imputer = IterativeImputer(estimator=xgb.XGBRegressor(), max_iter=10, random_state=42)
layoffs[numeric_cols] = num_imputer.fit_transform(layoffs[numeric_cols])

# Impute categorical columns
cat_cols = ['location', 'industry', 'stage'] 
encoders = {}

for col in cat_cols:
    le = LabelEncoder()
    non_null = layoffs[col][layoffs[col].notnull()]
    le.fit(non_null)
    layoffs[col] = layoffs[col].map(lambda x: le.transform([x])[0] if pd.notnull(x) else np.nan)
    encoders[col] = le

cat_imputer = IterativeImputer(estimator=xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'),
                               max_iter=10, random_state=42)
layoffs[cat_cols] = cat_imputer.fit_transform(layoffs[cat_cols])

for col in cat_cols:
    le = encoders[col]
    layoffs[col] = layoffs[col].round().astype(int)
    layoffs[col] = layoffs[col].map(lambda x: le.inverse_transform([x])[0])

# ------------------------------
# 2. Prepare monthly aggregated data
# ------------------------------
monthly_data = layoffs.set_index('date').resample('ME').agg({
    'total_laid_off': 'sum',
    'percentage_laid_off': 'mean'
}).reset_index()
monthly_data['rolling_avg'] = monthly_data['total_laid_off'].rolling(window=3).mean()

# ------------------------------
# 3. Create plots
# ------------------------------

# Time-series figure
ts_fig = go.Figure()
ts_fig.add_trace(go.Scatter(
    x=monthly_data['date'],
    y=monthly_data['total_laid_off'],
    mode='lines+markers',
    name='Monthly Layoffs'
))
ts_fig.add_trace(go.Scatter(
    x=monthly_data['date'],
    y=monthly_data['rolling_avg'],
    mode='lines+markers',
    name='3-Month Rolling Avg',
    line=dict(color='orange', width=3, dash='dash')
))
ts_fig.add_trace(go.Scatter(
    x=monthly_data['date'],
    y=monthly_data['percentage_laid_off'],
    mode='lines+markers',
    name='Average % Laid Off',
    line=dict(color='green', width=3, dash='dot'),
    yaxis='y2'
))
ts_fig.update_layout(
    title='Monthly Layoffs, Rolling Average, and Average % Laid Off',
    xaxis_title='Date',
    yaxis=dict(title='Number of Employees Laid Off'),
    yaxis2=dict(title='Average % Laid Off', overlaying='y', side='right'),
    legend_title='Metrics',
    height=500,
    width=700
)

# Animated numeric distributions figures
numeric_anim_figs = {}
for col in numeric_cols:
    anim_fig = px.histogram(
        layoffs,
        x=col,
        animation_frame='year',
        nbins=30,
        range_x=[layoffs[col].min(), layoffs[col].max()],
        title=f'Yearly Distribution of {col}',
        labels={col: col},
        opacity=0.75
    )
    anim_fig.update_layout(height=500, width=700, yaxis_title='Count')
    numeric_anim_figs[col] = anim_fig

# Categorical distributions with dropdown
cat_fig = go.Figure()
years = sorted(layoffs['year'].dropna().unique())
for year in years:
    filtered = layoffs[layoffs['year'] == year]
    for col in cat_cols:
        vc = filtered[col].value_counts().reset_index()
        vc.columns = [col, 'count']
        cat_fig.add_trace(go.Bar(
            x=vc[col],
            y=vc['count'],
            name=col,
            visible=(year == years[0])
        ))

# Prepare dropdown buttons
buttons = []
n_cols = len(cat_cols)
for i, year in enumerate(years):
    visible = [False] * len(years) * n_cols
    for j in range(n_cols):
        visible[i*n_cols + j] = True
    buttons.append(dict(
        label=str(year),
        method='update',
        args=[{'visible': visible},
              {'title': f'Categorical Distributions for {year}'}]
    ))

cat_fig.update_layout(
    updatemenus=[dict(
        active=0,
        buttons=buttons,
        x=1.1,
        y=1,
        xanchor='right',
        yanchor='top'
    )],
    title=f'Categorical Distributions for {years[0]}',
    barmode='group',
    height=500,
    width=900
)

# ------------------------------
# 4. Dash App
# ------------------------------
app = Dash(__name__)

app.layout = html.Div([
    html.H1("Layoffs Data Dashboard", style={'textAlign': 'center'}),
    dcc.Tabs([
        dcc.Tab(label='Time Series', children=[
            dcc.Graph(figure=ts_fig)
        ]),
        dcc.Tab(label='Numeric Distributions', children=[
            dcc.Dropdown(
                id='numeric-dropdown',
                options=[{'label': col, 'value': col} for col in numeric_cols],
                value=numeric_cols[0],
                clearable=False
            ),
            dcc.Graph(id='numeric-graph', figure=numeric_anim_figs[numeric_cols[0]])
        ]),
        dcc.Tab(label='Categorical Distributions', children=[
            dcc.Graph(figure=cat_fig)
        ])
    ])
])

# Callback for numeric distribution dropdown
from dash.dependencies import Input, Output

@app.callback(
    Output('numeric-graph', 'figure'),
    Input('numeric-dropdown', 'value')
)
def update_numeric_graph(selected_col):
    return numeric_anim_figs[selected_col]

# ------------------------------
# 5. Run app
# ------------------------------
if __name__ == '__main__':
    app.run(debug=True)


In [None]:
# ------------------------------
# Exploratory Data Analysis
# ------------------------------

import plotly.express as px
import pycountry
import pandas as pd

# Ensure 'year' column exists
layoffs['year'] = layoffs['date'].dt.year

# Function to get ISO-3 country codes
def get_country_code(name):
    try:
        country = pycountry.countries.lookup(name)
        return country.alpha_3
    except LookupError:
        return None

layoffs['country_code'] = layoffs['country'].apply(get_country_code)

# Drop rows without valid country codes
layoffs_clean = layoffs.dropna(subset=['country_code'])

# Aggregate data by year and country
agg_df = layoffs_clean.groupby(['year', 'country', 'country_code']).agg({
    'total_laid_off': 'sum'
}).reset_index()

# Create choropleth with year dropdown
years = sorted(agg_df['year'].unique())
fig = px.choropleth(
    agg_df[agg_df['year'] == years[0]],
    locations='country_code',
    color='total_laid_off',
    hover_name='country',
    color_continuous_scale=px.colors.sequential.Plasma,
    title=f'Total Layoffs by Country - {years[0]}'
)

# Add dropdown buttons for each year
buttons = []
for year in years:
    visible = agg_df['year'] == year
    buttons.append(dict(
        label=str(year),
        method='update',
        args=[{'z': [agg_df[visible]['total_laid_off']],
               'locations': [agg_df[visible]['country_code']],
               'hovertext': [agg_df[visible]['country']]},
              {'title': f'Total Layoffs by Country - {year}'}]
    ))

fig.update_layout(
    updatemenus=[dict(
        buttons=buttons,
        x=1.1,
        y=1,
        xanchor='right',
        yanchor='top'
    )]
)

fig.show()


In [None]:
# Comparison between funding raised and layoffs over time
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import plotly.express as px
from dash import Dash, dcc, html

correlation = layoffs[['funds_raised', 'total_laid_off', 'percentage_laid_off']].corr().iloc[0,1]
heatmap_fig = px.imshow(
    layoffs[['funds_raised', 'total_laid_off', 'percentage_laid_off']].corr(),
    text_auto=True,
    color_continuous_scale='Viridis',
    title='Correlation between Funds Raised and Total Laid Off'
)
heatmap_fig.show()

print(f"Correlation between funds raised and total laid off: {correlation:.2f}")



In [None]:
# ============================================================
# Clustering Companies Based on Risk of Layoffs
# ============================================================


import warnings
warnings.filterwarnings('ignore')

import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

import dash
from dash import dcc, html, dash_table
from dash.dependencies import Input, Output
import plotly.express as px

# -------------------------------
# 1. Data Preparation
# -------------------------------
layoffs_cluster = layoffs.copy()
layoffs_cluster = layoffs_cluster.dropna(subset=[
    'total_laid_off', 'percentage_laid_off', 'funds_raised', 'stage', 'industry', 'location'
])
layoffs_cluster = layoffs_cluster[['company', 'total_laid_off', 'percentage_laid_off',
                                   'funds_raised', 'stage', 'industry', 'location']]

# One-hot encode categorical variables
layoffs_encoded = pd.get_dummies(layoffs_cluster.drop('company', axis=1),
                                 columns=['stage', 'industry', 'location'])

# Standardize numeric features
scaler = StandardScaler()
layoffs_encoded[['total_laid_off', 'percentage_laid_off', 'funds_raised']] = scaler.fit_transform(
    layoffs_encoded[['total_laid_off', 'percentage_laid_off', 'funds_raised']]
)

# -------------------------------
# 2. KMeans Clustering
# -------------------------------
optimal_k = 5
kmeans = KMeans(n_clusters=optimal_k, random_state=42)
layoffs_encoded['cluster'] = kmeans.fit_predict(layoffs_encoded)
layoffs_cluster['cluster'] = layoffs_encoded['cluster']

# -------------------------------
# 3. PCA for 2D Visualization
# -------------------------------
pca = PCA(n_components=2)
components = pca.fit_transform(layoffs_encoded.drop('cluster', axis=1))

pca_df = pd.DataFrame(data=components, columns=['PC1', 'PC2'])
pca_df['cluster'] = layoffs_cluster['cluster'].astype(str)
pca_df['company'] = layoffs_cluster['company']
pca_df['industry'] = layoffs_cluster['industry']
pca_df['stage'] = layoffs_cluster['stage']
pca_df['location'] = layoffs_cluster['location']
pca_df['total_laid_off'] = layoffs_cluster['total_laid_off']
pca_df['percentage_laid_off'] = layoffs_cluster['percentage_laid_off']
pca_df['funds_raised'] = layoffs_cluster['funds_raised']

# -------------------------------
# 4. Build Dash App
# -------------------------------
app = dash.Dash(__name__)

industry_options = [{'label': ind, 'value': ind} for ind in sorted(pca_df['industry'].unique())]
stage_options = [{'label': st, 'value': st} for st in sorted(pca_df['stage'].unique())]
location_options = [{'label': loc, 'value': loc} for loc in sorted(pca_df['location'].unique())]

app.layout = html.Div([
    html.H1("Tech Layoffs Cluster Dashboard", style={'textAlign': 'center'}),
    html.Div([
        html.Label("Filter by Industry:"),
        dcc.Dropdown(id='industry-dropdown', options=industry_options, multi=True, placeholder="Select industry"),
        html.Label("Filter by Stage:"),
        dcc.Dropdown(id='stage-dropdown', options=stage_options, multi=True, placeholder="Select company stage"),
        html.Label("Filter by Location:"),
        dcc.Dropdown(id='location-dropdown', options=location_options, multi=True, placeholder="Select location"),
    ], style={'width': '25%', 'display': 'inline-block', 'verticalAlign': 'top', 'padding': '20px'}),
    html.Div([
        dcc.Graph(id='cluster-graph'),
        html.H3("Cluster Summary"),
        html.Div(id='cluster-text-summary', style={'marginBottom': '10px', 'fontStyle': 'italic'}),
        dash_table.DataTable(
            id='cluster-summary',
            columns=[
                {"name": "Cluster", "id": "cluster"},
                {"name": "Avg Total Laid Off", "id": "avg_total_laid_off"},
                {"name": "Avg % Laid Off", "id": "avg_percentage_laid_off"},
                {"name": "Avg Funds Raised", "id": "avg_funds_raised"}
            ],
            style_table={'overflowX': 'auto'},
            style_cell={'textAlign': 'center'},
            style_header={'fontWeight': 'bold'},
        ),
        html.Br(),
        html.Button("Download Cluster Summary", id="btn-download", n_clicks=0),
        dcc.Download(id="download-cluster-summary")
    ], style={'width': '70%', 'display': 'inline-block'})
])

# -------------------------------
# 5. Callbacks
# -------------------------------
@app.callback(
    [Output('cluster-graph', 'figure'),
     Output('cluster-summary', 'data'),
     Output('cluster-text-summary', 'children'),
     Output('download-cluster-summary', 'data')],
    [Input('industry-dropdown', 'value'),
     Input('stage-dropdown', 'value'),
     Input('location-dropdown', 'value'),
     Input('btn-download', 'n_clicks')],
    prevent_initial_call=False
)
def update_graph(selected_industries, selected_stages, selected_locations, n_clicks):
    filtered_df = pca_df.copy()
    if selected_industries:
        filtered_df = filtered_df[filtered_df['industry'].isin(selected_industries)]
    if selected_stages:
        filtered_df = filtered_df[filtered_df['stage'].isin(selected_stages)]
    if selected_locations:
        filtered_df = filtered_df[filtered_df['location'].isin(selected_locations)]

    # Scatter plot
    fig = px.scatter(
        filtered_df,
        x='PC1', y='PC2',
        color='cluster',
        hover_data=['company', 'industry', 'stage', 'location'],
        title='PCA Visualization of Layoffs Clusters'
    )
    fig.update_layout(height=500, width=700)

    # Cluster summary table
    summary = (
        filtered_df.groupby('cluster')
        .agg(
            avg_total_laid_off=('total_laid_off', 'mean'),
            avg_percentage_laid_off=('percentage_laid_off', 'mean'),
            avg_funds_raised=('funds_raised', 'mean')
        )
        .reset_index()
    )
    summary['avg_total_laid_off'] = summary['avg_total_laid_off'].round(2)
    summary['avg_percentage_laid_off'] = summary['avg_percentage_laid_off'].round(2)
    summary['avg_funds_raised'] = summary['avg_funds_raised'].round(2)

    # Generate textual summary per cluster
    text_summaries = []
    for _, row in summary.iterrows():
        cluster_text = (f"Cluster {row['cluster']}: On average, companies laid off {row['avg_total_laid_off']} employees "
                        f"({row['avg_percentage_laid_off']}% of workforce) with average funds raised of ${row['avg_funds_raised']}M.")
        text_summaries.append(cluster_text)
    text_summary_str = html.Ul([html.Li(txt) for txt in text_summaries])

    # Prepare download data
    download_data = None
    if n_clicks > 0:
        download_data = dcc.send_data_frame(summary.to_csv, "cluster_summary.csv", index=False)

    return fig, summary.to_dict('records'), text_summary_str, download_data

# -------------------------------
# 6. Run app
# -------------------------------
if __name__ == '__main__':
    app.run(debug=True)

In [None]:
# ============================================================
# Industry-Normalized Anomaly Detection & Clustering of Layoffs
# ============================================================

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import dash
from dash import dcc, html, dash_table
from dash.dependencies import Input, Output
import plotly.express as px

# -------------------------------
# 1. Data Preparation
# -------------------------------
layoffs_anomaly = layoffs.copy()
layoffs_anomaly = layoffs_anomaly.dropna(subset=[
    'company','total_laid_off','percentage_laid_off','funds_raised','industry'
])
layoffs_anomaly = layoffs_anomaly[['company','total_laid_off','percentage_laid_off','funds_raised','industry']]

# Standardize within industry
def industry_standardize(df):
    df_scaled = df.copy()
    df_scaled[['total_laid_off','percentage_laid_off','funds_raised']] = \
        df_scaled.groupby('industry')[['total_laid_off','percentage_laid_off','funds_raised']].transform(
            lambda x: ((x - x.mean()) / x.std(ddof=0)).fillna(0)
        )
    return df_scaled

layoffs_scaled = industry_standardize(layoffs_anomaly)

# One-hot encode industry
layoffs_encoded = pd.get_dummies(layoffs_scaled.drop('company', axis=1), columns=['industry'])

# -------------------------------
# 2. Anomaly Detection
# -------------------------------
iso_forest = IsolationForest(contamination=0.05, random_state=42)
layoffs_encoded['anomaly'] = iso_forest.fit_predict(layoffs_encoded)
layoffs_anomaly['anomaly'] = layoffs_encoded['anomaly']
layoffs_anomaly['anomaly_label'] = layoffs_anomaly['anomaly'].map({1:'Normal', -1:'Anomaly'})

# -------------------------------
# 3. PCA for 2D Visualization
# -------------------------------
pca = PCA(n_components=2)
components = pca.fit_transform(layoffs_encoded.drop('anomaly', axis=1))
pca_df = pd.DataFrame(components, columns=['PC1','PC2'])
pca_df = pd.concat([pca_df, layoffs_anomaly[['company','total_laid_off','percentage_laid_off','funds_raised','industry','anomaly_label']]], axis=1)

# -------------------------------
# 4. Cluster anomalies
# -------------------------------
def cluster_anomalies(df):
    anomaly_data = df[df['anomaly_label']=='Anomaly'].copy()
    if anomaly_data.empty:
        df['cluster'] = np.nan
        return df
    n_clusters = min(5, len(anomaly_data))
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    anomaly_data['cluster'] = kmeans.fit_predict(anomaly_data[['PC1','PC2']])
    df = df.merge(anomaly_data[['company','cluster']], on='company', how='left')
    return df

pca_df = cluster_anomalies(pca_df)
pca_df['cluster_plot'] = pca_df['cluster'].fillna(-1).astype(int)

# -------------------------------
# 5. Dash App
# -------------------------------
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Layoff Anomaly Detection by Industry"),
    
    html.Label("Select Industry:"),
    dcc.Dropdown(
        id='industry-dropdown',
        options=[{'label': i, 'value': i} for i in sorted(pca_df['industry'].unique())] + [{'label':'All','value':'All'}],
        value='All'
    ),
    
    dcc.Graph(id='pca-graph'),
    
    html.H3("Summary of Groups"),
    html.Div(id='group-summary-text'),
    
    dash_table.DataTable(
        id='group-summary-table',
        columns=[
            {"name": "Company", "id": "company"},
            {"name": "Industry", "id": "industry"},
            {"name": "Total Laid Off", "id": "total_laid_off"},
            {"name": "% Laid Off", "id": "percentage_laid_off"},
            {"name": "Funds Raised", "id": "funds_raised"},
            {"name": "Anomaly Label", "id": "anomaly_label"},
            {"name": "Cluster", "id": "cluster"}
        ],
        page_size=10,
        style_table={'overflowX': 'auto'},
        style_cell={'textAlign': 'center'},
        style_header={'fontWeight': 'bold'}
    ),
    
    html.Br(),
    html.Button("Download Summary CSV", id="btn-download", n_clicks=0),
    dcc.Download(id="download-summary")
])

# -------------------------------
# 6. Callbacks
# -------------------------------
@app.callback(
    [Output('pca-graph', 'figure'),
     Output('group-summary-text', 'children'),
     Output('group-summary-table', 'data'),
     Output('download-summary', 'data')],
    [Input('industry-dropdown','value'),
     Input('btn-download','n_clicks')]
)
def update_dashboard(selected_industry, n_clicks):
    df_plot = pca_df.copy()
    if selected_industry != 'All':
        df_plot = df_plot[df_plot['industry'] == selected_industry]
    
    # PCA Scatter plot with hover info
    df_plot['marker_size'] = df_plot['anomaly_label'].map({'Normal':10, 'Anomaly':15})
    fig = px.scatter(
        df_plot,
        x='PC1', y='PC2',
        color='anomaly_label',
        symbol='cluster_plot',
        size='marker_size',
        opacity=0.7,
        hover_data={
            'company': True,
            'industry': True,
            'total_laid_off': True,
            'percentage_laid_off': True,
            'funds_raised': True,
            'cluster_plot': True,
            'PC1': False,
            'PC2': False,
            'marker_size': False
        },
        title=f"PCA of Layoffs: {selected_industry}"
    )
    fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))

    # -------------------------------
    # Textual Summary of Normal and Anomaly Groups (with company names)
    # -------------------------------
    summary_text = []
    
    # Normal group
    normal_df = df_plot[df_plot['anomaly_label']=='Normal']
    if not normal_df.empty:
        summary_text.append(
            html.Li(f"Normal Group: {len(normal_df)} companies, "
                    f"Avg layoffs: {normal_df['total_laid_off'].mean():.1f}, "
                    f"Avg % laid off: {normal_df['percentage_laid_off'].mean():.1f}%, "
                    f"Avg funds raised: {normal_df['funds_raised'].mean():.2f}M. "
                    f"Companies: {', '.join(normal_df['company'].tolist())}")
        )

    # Anomaly clusters
    anomaly_df = df_plot[df_plot['anomaly_label']=='Anomaly']
    if not anomaly_df.empty:
        for cl in sorted(anomaly_df['cluster_plot'].unique()):
            cl_df = anomaly_df[anomaly_df['cluster_plot']==cl]
            company_list = ", ".join(cl_df['company'].tolist())
            summary_text.append(
                html.Li(f"Anomaly Cluster {cl}: {len(cl_df)} companies, "
                        f"Avg layoffs: {cl_df['total_laid_off'].mean():.1f}, "
                        f"Avg % laid off: {cl_df['percentage_laid_off'].mean():.1f}%, "
                        f"Avg funds raised: {cl_df['funds_raised'].mean():.2f}M. "
                        f"Companies: {company_list}")
            )
    
    summary_text_component = html.Ul(summary_text)
    
    # -------------------------------
    # Table
    # -------------------------------
    table_data = df_plot[['company','industry','total_laid_off','percentage_laid_off','funds_raised','anomaly_label','cluster_plot']].rename(columns={'cluster_plot':'cluster'}).to_dict('records')
    
    # -------------------------------
    # CSV Download
    # -------------------------------
    download_data = None
    if n_clicks > 0:
        download_data = dcc.send_data_frame(pd.DataFrame(table_data).to_csv, "layoff_summary.csv", index=False)
    
    return fig, summary_text_component, table_data, download_data

# -------------------------------
# 7. Run App
# -------------------------------
if __name__ == '__main__':
    app.run(debug=True)


In [None]:
# ============================================================
# Layoffs Survival Analysis: Kaplan-Meier & Cox Proportional Hazards
# ============================================================

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from lifelines import KaplanMeierFitter, CoxPHFitter
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output

# -------------------------------
# 1. Data Preparation
# -------------------------------
layoffs_survival = layoffs.copy()

# Drop rows with missing critical info
layoffs_survival = layoffs_survival.dropna(subset=['date', 'total_laid_off', 'funds_raised'])

# Event: whether a layoff occurred
layoffs_survival['event'] = (layoffs_survival['total_laid_off'] > 0).astype(int)

# Time to event (days since first recorded layoff)
layoffs_survival['time_to_event'] = (layoffs_survival['date'] - layoffs_survival['date'].min()).dt.days
layoffs_survival = layoffs_survival[layoffs_survival['time_to_event'] >= 0]

# Log-transform funding
layoffs_survival['funds_raised'] = np.log1p(layoffs_survival['funds_raised'])

# Indicator for post-2022 crash
layoffs_survival['post_2022_crash'] = (layoffs_survival['date'] >= pd.to_datetime('2022-01-01')).astype(int)

# Categorize funding
funds_quantiles = layoffs_survival['funds_raised'].quantile([0.33, 0.66])
layoffs_survival['funds_category'] = pd.cut(
    layoffs_survival['funds_raised'],
    bins=[-np.inf, funds_quantiles[0.33], funds_quantiles[0.66], np.inf],
    labels=['Low', 'Medium', 'High']
)

# -------------------------------
# 2. Dash App Layout
# -------------------------------
app = Dash(__name__)

app.layout = html.Div([
    html.H1("Layoffs Survival Analysis", style={'textAlign': 'center'}),

    html.Div([
        html.Label("Filter Type:"),
        dcc.Dropdown(
            id="filter-type",
            options=[
                {"label": "All Data", "value": "all"},
                {"label": "By Country", "value": "country"},
                {"label": "By Stage", "value": "stage"}
            ],
            value="all",
            clearable=False
        )
    ], style={"margin": "20px"}),

    html.Div([
        html.Label("Select Value (if applicable):"),
        dcc.Dropdown(id="filter-value", clearable=True)
    ], style={"margin": "20px"}),

    html.Div([
        html.H2("Kaplan-Meier Survival Curve"),
        dcc.Graph(id="km-plot"),
        html.Div(id="km-summary", style={"margin": "20px", "fontStyle": "italic"})
    ]),

    html.Div([
        html.H2("Cox Proportional Hazards Model"),
        dcc.Graph(id="cox-plot"),
        html.Div(id="cox-summary", style={"margin": "20px", "fontStyle": "italic"})
    ])
])

# -------------------------------
# 3. Callbacks
# -------------------------------

# Update second dropdown options dynamically
@app.callback(
    Output("filter-value", "options"),
    Input("filter-type", "value")
)
def update_filter_values(filter_choice):
    if filter_choice == "country":
        return [{"label": c, "value": c} for c in sorted(layoffs_survival['country'].dropna().unique())]
    elif filter_choice == "stage":
        return [{"label": s, "value": s} for s in sorted(layoffs_survival['stage'].dropna().unique())]
    return []

# Update plots + summaries
@app.callback(
    [Output("km-plot", "figure"),
     Output("km-summary", "children"),
     Output("cox-plot", "figure"),
     Output("cox-summary", "children")],
    [Input("filter-type", "value"),
     Input("filter-value", "value")]
)
def update_plots(filter_choice, filter_value):
    data = layoffs_survival.copy()

    # Apply filter if applicable
    if filter_choice == "country" and filter_value:
        data = data[data['country'] == filter_value]
    elif filter_choice == "stage" and filter_value:
        data = data[data['stage'] == filter_value]

    # --------------------
    # Kaplan-Meier
    # --------------------
    kmf = KaplanMeierFitter()
    fig_km = go.Figure()
    km_texts = []

    medians = {}
    for fund_level in data['funds_category'].dropna().unique():
        subset = data[data['funds_category'] == fund_level]
        if len(subset) > 0:
            kmf.fit(subset['time_to_event'], subset['event'], label=f"Funds {fund_level}")
            temp_df = kmf.survival_function_.reset_index()
            fig_km.add_trace(go.Scatter(
                x=temp_df['timeline'],
                y=temp_df[kmf._label],
                mode='lines',
                name=f"Funds {fund_level}"
            ))
            # Median survival
            med_surv = kmf.median_survival_time_
            medians[fund_level] = med_surv
            km_texts.append(f"Funding {fund_level}: Median survival ≈ {med_surv:.0f} days")

    fig_km.update_layout(
        title="Kaplan-Meier Survival Curve",
        xaxis_title="Days Since First Recorded Layoff",
        yaxis_title="Survival Probability",
        height=500, width=700
    )

    # Sort medians to show ranking
    if medians:
        ranking = sorted(medians.items(), key=lambda x: x[1], reverse=True)
        ranking_str = " → ".join([f"{k} ({v:.0f}d)" for k, v in ranking])
        km_texts.append(f"Ranking by median survival: {ranking_str}")
    km_summary_text = html.Ul([html.Li(txt) for txt in km_texts]) if km_texts else "Not enough data for KM summary."

    # --------------------
    # Cox Proportional Hazards
    # --------------------
    cox_vars = ['time_to_event', 'event', 'funds_raised', 'post_2022_crash', 'funds_category']
    if filter_choice in ["country", "stage"]:
        cox_vars.append(filter_choice)

    df_cox = data[cox_vars].copy()
    df_cox = pd.get_dummies(df_cox, drop_first=True)
    df_cox = df_cox.dropna()

    fig_cph = go.Figure()
    cox_summary_text = "Not enough data for Cox PH model."

    if len(df_cox) > 10:  # Ensure enough samples
        cph = CoxPHFitter(penalizer=0.1)
        cph.fit(df_cox, duration_col='time_to_event', event_col='event')

        cph_summary = cph.summary.reset_index()
        cph_summary['exp(coef)'] = np.exp(cph_summary['coef'])
        cph_summary['exp(coef) lower 95%'] = np.exp(cph_summary['coef lower 95%'])
        cph_summary['exp(coef) upper 95%'] = np.exp(cph_summary['coef upper 95%'])

        # Plot hazard ratios
        fig_cph.add_trace(go.Bar(
            x=cph_summary['covariate'],
            y=cph_summary['exp(coef)'],
            error_y=dict(
                type='data',
                symmetric=False,
                array=cph_summary['exp(coef) upper 95%'] - cph_summary['exp(coef)'],
                arrayminus=cph_summary['exp(coef)'] - cph_summary['exp(coef) lower 95%']
            ),
            name='Hazard Ratios'
        ))

        fig_cph.update_layout(
            title="Cox Proportional Hazards Model Results",
            xaxis_title="Covariates",
            yaxis_title="Hazard Ratio",
            height=500, width=700
        )

        # Build textual interpretation
        texts = []
        for _, row in cph_summary.iterrows():
            cov = row['covariate']
            hr = row['exp(coef)']
            pval = row['p']
            if hr > 1:
                effect = "increases hazard (layoff risk)"
            else:
                effect = "decreases hazard (layoff risk)"
            sig = " ⭐" if pval < 0.05 else ""
            texts.append(f"{cov}: HR={hr:.2f}, p={pval:.3f}{sig} → {effect}")
        cox_summary_text = html.Ul([html.Li(t) for t in texts])

    return fig_km, km_summary_text, fig_cph, cox_summary_text

# -------------------------------
# 4. Run App
# -------------------------------
if __name__ == "__main__":
    app.run(debug=True)
