In [2]:
# all needed packages for this project 
# use the requirements file for installation

import pandas as pd
import numpy as np
import mlforecast
import lightgbm
from matplotlib import pyplot as plt
import optuna
import plotly_express as px

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
layoffs = pd.read_csv("layoffs_data.csv")

layoffs.head()

Unnamed: 0,Company,Location_HQ,Industry,Percentage,Date,Source,Funds_Raised,Stage,Date_Added,Country,Laid_Off_Count,List_of_Employees_Laid_Off
0,Farfetch,London,Retail,0.25,2024-02-16,https://ominho.pt/depois-da-saida-do-ceo-de-gu...,1700.0,Acquired,2024-02-16 18:39:00,United Kingdom,,Unknown
1,Toast,Boston,Food,0.1,2024-02-15,https://www.bloomberg.com/news/articles/2024-0...,962.0,Post-IPO,2024-02-15 16:48:04,United States,550.0,Unknown
2,Storytel,Stockholm,Consumer,0.13,2024-02-15,https://investors.storytel.com/en/wp-content/u...,312.0,Post-IPO,2024-02-16 18:37:24,Sweden,80.0,Unknown
3,Gro Intelligence,New York City,Food,0.1,2024-02-15,https://agfundernews.com/ai-powered-ag-insight...,118.0,Series B,2024-02-16 18:35:19,United States,20.0,Unknown
4,CodeSee,SF Bay Area,Data,1.0,2024-02-15,https://www.linkedin.com/posts/shaneak_i-am-ve...,10.0,Seed,2024-02-15 19:34:43,United States,,Unknown


In [31]:
# data prep

df = layoffs[["Location_HQ", "Laid_Off_Count", "Date"]].copy()

display(df)

Unnamed: 0,Location_HQ,Laid_Off_Count,Date
0,London,,2024-02-16
1,Boston,550.0,2024-02-15
2,Stockholm,80.0,2024-02-15
3,New York City,20.0,2024-02-15
4,SF Bay Area,,2024-02-15
...,...,...,...
3449,Los Angeles,,2020-03-16
3450,Los Angeles,8.0,2020-03-13
3451,SF Bay Area,6.0,2020-03-13
3452,Los Angeles,20.0,2020-03-12


In [32]:
df["Laid_Off_Count"].dropna(inplace=True)

df["ds"] = pd.to_datetime(df["Date"])

display(df)

Unnamed: 0,Location_HQ,Laid_Off_Count,Date,ds
0,London,,2024-02-16,2024-02-16
1,Boston,550.0,2024-02-15,2024-02-15
2,Stockholm,80.0,2024-02-15,2024-02-15
3,New York City,20.0,2024-02-15,2024-02-15
4,SF Bay Area,,2024-02-15,2024-02-15
...,...,...,...,...
3449,Los Angeles,,2020-03-16,2020-03-16
3450,Los Angeles,8.0,2020-03-13,2020-03-13
3451,SF Bay Area,6.0,2020-03-13,2020-03-13
3452,Los Angeles,20.0,2020-03-12,2020-03-12


In [33]:
grouped_df = df.groupby(["Location_HQ"])

print(grouped_df.groups.keys())


dict_keys(['Abuja', 'Accra', 'Ahmedabad', 'Alamosa', 'Albany', 'Amsterdam', 'Ann Arbor', 'Athens', 'Atlanta', 'Auckland', 'Austin', 'Baltimore', 'Bangkok', 'Barcelona', 'Baton Rouge', 'Beau Vallon', 'Beijing', 'Belo Horizonte', 'Bend', 'Bengaluru', 'Berlin', 'Birmingham', 'Bismarck', 'Blumenau', 'Bogota', 'Boise', 'Boston', 'Boulder', 'Brasilia', 'Brisbane', 'Bristol', 'Brno', 'Brussels', 'Bucharest', 'Budapest', 'Buenos Aires', 'Burlington', 'Cairo', 'Calgary', 'Cape Town', 'Cayman Islands', 'Charleston', 'Charlotte', 'Charlottesville', 'Chemnitz', 'Chennai', 'Chester', 'Chicago', 'Cincinnati', 'Cleveland', 'Cluj-Napoca', 'Coimbra', 'Columbus', 'Copenhagen', 'Cork', 'Corvallis', 'Curitiba', 'Dakar', 'Dallas', 'Davenport', 'Denver', 'Detroit', 'Dover', 'Dubai', 'Dublin', 'Durham', 'Dusseldorf', 'Düsseldorf', 'Edinburgh', 'Eindhoven', 'Espoo', 'Evansville', 'Fayetteville', 'Ferdericton', 'Florianópolis', 'Fort Collins', 'Frankfurt', 'Førde', 'Geneva', 'Gothenburg', 'Grand Rapids', 'Guad

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000002121DBD64E0>

In [34]:
df = df.set_index("ds").resample("d").sum().reset_index()

display(df)
print(df.info())

Unnamed: 0,ds,Location_HQ,Laid_Off_Count,Date
0,2020-03-11,Salt Lake City,75.0,2020-03-11
1,2020-03-12,Los Angeles,20.0,2020-03-12
2,2020-03-13,Los AngelesSF Bay Area,14.0,2020-03-132020-03-13
3,2020-03-14,0,0.0,0
4,2020-03-15,0,0.0,0
...,...,...,...,...
1433,2024-02-12,0,0.0,0
1434,2024-02-13,SF Bay AreaNew York CitySF Bay AreaNew York Ci...,427.0,2024-02-132024-02-132024-02-132024-02-132024-0...
1435,2024-02-14,SF Bay AreaNashikNew York City,4269.0,2024-02-142024-02-142024-02-14
1436,2024-02-15,BostonStockholmNew York CitySF Bay AreaAnn Arbor,650.0,2024-02-152024-02-152024-02-152024-02-152024-0...


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1438 entries, 0 to 1437
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   ds              1438 non-null   datetime64[ns]
 1   Location_HQ     1438 non-null   object        
 2   Laid_Off_Count  1438 non-null   float64       
 3   Date            1438 non-null   object        
dtypes: datetime64[ns](1), float64(1), object(2)
memory usage: 45.1+ KB
None
