<a id="1"></a>
# **<span style="color:#00BFC4;"> Importing Necessary Libraries </span>**


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session


<a id="2"></a>
# **<span style="color:#00BFC4;"> Data Loading and Preparation </span>**

In [None]:
df = pd.read_csv('/kaggle/input/layoffs-2022/layoffs.csv')

<a id="2.1"></a>
## <span style="color:#e76f51;"> Column Descriptions  : </span>


- `company` - Name of the company
- `location` - Location of the layoff
- `industry` - Industry of the company
- `total_laid_off` - Number of employees laid off
- `percentage_laid_off` - Percentage of employees laid off
- `date` - Date of layoff
- `stage` - Stage of funding
- `country` - Name of Country
- `funds_raised` - Funds raised by the company (in Millions $)


In [None]:
df.info()

<a id="2.2"></a>
## <span style="color:#e76f51;"> Exploring Data : </span>

### <span style="color:#e76f51;"> Quick view of Data : </span>

In [None]:
df.head()

### <span style="color:#e76f51;"> 📌 Observations in Data : </span>


In [None]:
print(f'\033[94mNumber of rows in data: {df.shape[0]}')
print(f'\033[94mNumber of columns in data: {df.shape[1]}')
print(f'\033[94mNumber of values in data: {df.count().sum()}')
print(f'\033[94mNumber missing values in data: {sum(df.isna().sum())}')

### <span style="color:#e76f51;">  Column Wise missing values : </span>

In [None]:
print(f'\033[94m')
print(df.isna().sum().sort_values(ascending = False))

In [None]:
train = df.copy()

In [None]:
columns = ['location','industry','stage','country']

for col in columns:
    print (f'\033[94mNumber of Unique Values in %s'%col)
    print (train[col].nunique())

<a id="3"></a>
# **<span style="color:#00BFC4;"> Exploratory Data Analysis </span>**

<a id="3.1"></a>
## <span style="color:#e76f51;"> Null Value Distribution  </span>

## <span style="color:#1EB220;"> Column wise Null Value Distribution  </span>

In [None]:
train_null = pd.DataFrame(train.isna().sum())
train_null = train_null.sort_values(by = 0 ,ascending = False)[:-1]


fig = make_subplots(x_title="Missing Values")

fig.add_trace(go.Bar(x=train_null[0],
                     y=train_null.index,
                     orientation="h",
                    marker=dict(color=[n for n in range(12)], 
                                line_color='rgb(255,255,255)' , 
                                line_width = 2,
                                coloraxis="coloraxis")),
              1, 1)
fig.update_layout(height=700,
                  showlegend=False, title_text="Column wise Null Value Distribution", title_x=0.5,template='plotly_white')

<div class="alert alert-block alert-info" style="font-size:18px; font-family:verdana; line-height: 1.7em;">
    📌 &nbsp;<b><u>Observations in Column wise Null Value Distribution :</u></b><br>
 
* <i> Out of the <b><u>8</u></b> feature columns, <b><u>5</u></b> columns have missing values.</i><br>
* <i> <b><u>percentage_laid_off</u></b> has the most missing values <u>(546)</u>, followed by <b><u>total_laid_off</u></b> and <b><u>funds_raised</u></b>, which have <u>242</u> and <u>115</u> missing data, respectively.  </i><br>
* <i> <b><u>3</u></b> of the <b><u>5</u></b> missing value columns are <b><u>continuous</u></b> and exhibit the greatest missing values, while the remaining <b><u>2 (industry,stage)</u></b> are <b><u>categorical</u></b>  and feature the lowest missing values.</i><br>
</div>

## <span style="color:#1EB220;"> Row wise Null Value Distribution  </span>

In [None]:
missing_train_row = train.isna().sum(axis=1)
missing_train_row = pd.DataFrame(missing_train_row.value_counts()/train.shape[0]).reset_index()

missing_train_row.columns = ['no', 'count']

missing_train_row["count"] = missing_train_row["count"]*100

fig = make_subplots(x_title="Missing Values",)

fig.add_trace(go.Bar(x=missing_train_row["no"], 
                     y=missing_train_row["count"]  ,
                    marker=dict(color=[n for n in range(4)], 
                                line_color='rgb(255,255,255)' ,
                                line_width = 3
                                ,coloraxis="coloraxis")),
              1, 1)
fig.update_layout(height=700,
                showlegend=False, title_text="Row wise Null Value Distribution", title_x=0.5,template='plotly_white')

<div class="alert alert-block alert-info" style="font-size:18px; font-family:verdana; line-height: 1.7em;">
    📌 &nbsp;<b><u>Observations in Row wise Null Value Distribution :</u></b><br>
 
* <i> The maximum of missing value in an row is <b><u>4</u></b> and the lowest being <b><u>1</u></b></i><br>

## <span style="color:#DA36D3;"> Categorizing Columns  </span>

In [None]:
FEATURES = train.columns
data = train[FEATURES]
cat = ['location','industry','stage','country']
cont = ['total_laid_off','percentage','date','funds_raised']
text = ['company']
cat_features = [col for col in cat]
cont_features = [col for col in cont]
text_features = [col for col in text]


del data
print(f'\033[94mTotal number of features: {len(FEATURES)}')
print(f'\033[94mNumber of categorical features: {len(cat_features)}')
print(f'\033[94mName of categorical features: {cat_features}')
print(f'\033[94mNumber of continuos features: {len(cont_features)}')
print(f'\033[94mName of continuos features: {cont_features}')
print(f'\033[94mNumber of text features: {len(text_features)}')
print(f'\033[94mName of text features: {text_features}')


labels=['Categorical', 'Continuos','Text']
values= [len(cat_features), len(cont_features),len(text_features)]
colors = ['#DE3163', '#58D68D','#585AD6']

fig = go.Figure(data=[go.Pie(
    labels=labels, 
    values=values, pull=[0.05, 0.05, 0.05 ],
    marker=dict(colors=colors, 
                line=dict(color='#000000', 
                          width=2))
)])
fig.update_layout(height=500,
                  template='plotly_white')

fig.show()

### <span style="color:#e76f51;">  Distribution of Laid off Employees </span>
### <span style="color:#FF008B;"> Total Laid off Employees </span>

In [None]:
train_laid_off = train.copy()
train_laid_off["type"] = "Train"
fig = px.histogram(data_frame = train_laid_off, 
                   x="total_laid_off",
                   color= "type",
                   color_discrete_sequence =  ['#58D68D','#DE3163'],
                   marginal="box",
                   nbins= 100,
                    template="plotly_white",
                   labels={
                     "percentage_laid_off": "Number of laid-off Employees"
                     }
                )
fig.update_layout(height=700,
                  title = "Distribution of Laid off Employees" , title_x = 0.5,showlegend=False)
fig.show()

### <span style="color:#FF008B;"> Pecentage of Laid off Employees </span>

In [None]:
train_perct_laid_off = train.copy()
train_perct_laid_off["type"] = "Train"
fig = px.histogram(data_frame = train_perct_laid_off, 
                   x="percentage_laid_off",
                   color= "type",
                   color_discrete_sequence =  ['#58D68D','#DE3163'],
                   marginal="box",
                   nbins= 100,
                    template="plotly_white",
                   labels={
                     "percentage_laid_off": "Percentage of laid-off Employees"
                     }
                   
                )
fig.update_layout(height=700,
                  title = "Distribution of Laid off Employees" , title_x = 0.5,showlegend=False)
fig.show()

In [None]:
train.industry = train.industry.fillna('Unknown') #Since industry is the only categorical feature which has least the missing values might as well fill up the NA values 

### <span style="color:#5D3FD3;"> Number of lay-offs in Industries around the world  </span>

In [None]:
data = train.copy()
data = data.reset_index()
data_indust= data.groupby([ data.industry]).sum().sort_values(by = ['total_laid_off'], ascending = False)
data_indust = data_indust.reset_index()

In [None]:
fig = px.bar(data_indust,x = 'industry', y = 'total_laid_off', color = 'industry',text_auto='.2s',
             labels={
                     "industry": "Sectors",
                     "total_laid_off": "No. of downsized employees"
                     },template="plotly_white")
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig.update_xaxes(tickangle=90)
fig.update_layout(height=700,
                  title="Number of downsized employees in each Industry")
fig.show()
del data_indust

#### **These cutbacks had a significant impact on the transportation, consumer, and retail sectors.**

### <span style="color:#5D3FD3;"> Number of lay-offs in Industries around the world between 2020-22  </span>

In [None]:
data = train.copy()
data = data.reset_index()
data['date'] = pd.to_datetime(data['date'])
data_indust= data.groupby([ data.industry, data.date.dt.year]).sum().sort_values(by = ['total_laid_off','date'], ascending = False)
data_indust = data_indust.reset_index()

In [None]:
indust_22 = data_indust.loc[data_indust['date'] == 2022]
indust_21 = data_indust.loc[data_indust['date'] == 2021]
indust_20 = data_indust.loc[data_indust['date'] == 2020]

In [None]:
fig = go.Figure()

fig.add_trace(go.Bar(x = indust_20['industry'],
                y = indust_20['total_laid_off'],
                name='2020',
                marker_color='rgb(0, 70, 255)'
                ))

fig.add_trace(go.Bar(x = indust_21['industry'],
                y = indust_21['total_laid_off'],
                name='2021',
                marker_color='rgb(255, 0, 70)'
                ))

fig.add_trace(go.Bar(x = indust_22['industry'],
                y = indust_22['total_laid_off'],
                name='2022',
                marker_color='rgb(70, 255, 0)'
                ))

fig.update_traces(width=0.5)

fig.update_layout(height=700,
    title='Industries that downsized between 2020 and 22',
    xaxis_tickfont_size=5,
    xaxis=dict(
        title='Industries',
        titlefont_size=16,
        tickfont_size=14,
    ),
    yaxis=dict(
        title='Number of Laid-off Employees',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=1.0,
        y=1.0),
    template="plotly_white",
    barmode='stack',
    bargap=0.15,
)
fig.show()

#### 1. **Downsizing had a huge influence on transportation and travel in 2020.**
#### 2. **In 2021, consumer, real estate, food, and construction industries all suffered significantly as a result of economic downturn.**
#### 3. **Layoffs yet again had a substantial impact in 2022 on industries, such as retail, consumers, transportation, finance, and healthcare.**
#### 4. **At the same time, downsizing had a massive impact on other significant areas including education and infrastructure.**
#### 5. **In 2020, there were no significant problems with cryptocurrency, but in 2022, it had a significant influence.**

### <span style="color:#5D3FD3;"> Percentage of Laid-off Employees in Each Country  </span>


In [None]:
fig = px.pie(train, values='total_laid_off', names='country',hole=.5,
                    template="plotly_white")
fig.update_traces(textposition='inside',textinfo='percent+label')
fig.update_layout(height=700,
                  title='Percentage of Laid-off Employees in each Country')
fig.show()

#### **By the looks of it United States is disproportionately affected by layoffs in comparison to the rest of the globe.**
#### **India, the second highest affected country(12.6%). Several companies in Brazil and Singapore were also impacted by these layoffs.**

### <span style="color:#5D3FD3;"> Top 15 countries with the highest levels of workforce downsizing  </span>

In [None]:
data = train.copy()
country_data = data.groupby('country').sum()['total_laid_off'].sort_values(ascending=False).iloc[:15]

fig = px.bar(country_data, text_auto='.2s',
             labels={
                     "country": "Countries",
                     "value": "No. of downsized employees"
                     },template="plotly_white")
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig.update_xaxes(tickangle=90)
fig.update_layout(height=700,
                  title="15 countries with the highest levels of workforce downsizing"
                  ,showlegend=False)
fig.show()

### <span style="color:#5D3FD3;"> Top 10 Corporations that reduced their workforce the most in each year  </span>

In [None]:
data = train.copy()
data = data.reset_index()
data['date'] = pd.to_datetime(data['date'])
data_comp = data.groupby([ data.company, data.date.dt.year]).sum().sort_values(by = ['total_laid_off','date'], ascending = False)
data_comp = data_comp.reset_index()

In [None]:
comp_22 = data_comp.loc[data_comp['date'] == 2022].iloc[:10]
comp_21 = data_comp.loc[data_comp['date'] == 2021].iloc[:10]
comp_20 = data_comp.loc[data_comp['date'] == 2020].iloc[:10]

In [None]:
fig = go.Figure()

fig.add_trace(go.Bar(x = comp_22['company'],
                y = comp_22['total_laid_off'],
                name='2022',
                marker_color='rgb(82, 249, 11)'
                ))
fig.add_trace(go.Bar(x = comp_21['company'],
                y = comp_21['total_laid_off'],
                name='2021',
                marker_color='rgb(255, 255, 11)'
                ))

fig.add_trace(go.Bar(x = comp_20['company'],
                y = comp_20['total_laid_off'],
                name='2020',
                marker_color='rgb(255, 1, 11)'
                ))

fig.update_traces(width=0.4)

fig.update_layout(height=600,
    title='Major corporations that downsized the most between 2020 and 22',
    xaxis_tickfont_size=5,
    xaxis=dict(
        title='Companies',
        titlefont_size=16,
        tickfont_size=14,
    ),
    yaxis=dict(
        title='Number of Laid-off Employees',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=1.0,
        y=1.0),
    barmode='stack',
    template="plotly_white"
)
fig.show()

### <span style="color:#5D3FD3;"> Top 10 countries and the largest companies in each of them that cut the most jobs  </span>

In [None]:
sun_data = train.copy()
sun = sun_data.sort_values('total_laid_off', ascending=False).drop_duplicates('country').iloc[:10]

fig = px.sunburst(sun, path=['country','company'], values='total_laid_off',
                  color='total_laid_off',
                  color_continuous_scale='rdbu',template="plotly_white",
                  )
fig.update_layout(height=650, title = "Major corporations from ten nations reduced their workforces" , title_x = 0.47,)
fig.show()

##### Filtering Data into 3 Regions - **USA | India | World (Excluding US & IN)**

In [None]:
data = train.copy()
data_usa = data[data['country']=="United States"].sort_values(by = 'total_laid_off', ascending = False)
data_india = data[data['country']=="India"].sort_values(by = 'total_laid_off', ascending = False)
data_world = data[(data['country']!="India") & (data['country']!="United States")].sort_values(by = 'total_laid_off', ascending = False)

### <span style="color:#002CFF;"> Major US regions and businesses who reduced their personnel  </span>


In [None]:
fig = px.sunburst(data_usa.iloc[:10], path=['location','company'], values='total_laid_off',
                  color='total_laid_off',
                  color_continuous_scale='rdbu',template="plotly_white",
                  )
fig.update_layout(height=600, title = "Major US region and corporations which had downsized their workforces" , title_x = 0.47,)
fig.show()

### <span style="color:#6300FF;"> Major Indian regions and businesses who reduced their personnel  </span>

In [None]:
fig = px.sunburst(data_india.iloc[:10], path=['location','company'], values='total_laid_off',
                  color='total_laid_off',
                  color_continuous_scale='turbo',template="plotly_white",
                  )
fig.update_layout(height=600, title = "Major Indian region and corporations which had downsized their workforces" , title_x = 0.47,)
fig.show()

### <span style="color:#770737;"> Major Regions and businesses from around the world who reduced their personnel  </span>


In [None]:
fig = px.sunburst(data_world.iloc[:10], path=['location','company'], values='total_laid_off',
                  color='total_laid_off',
                  color_continuous_scale='turbo',template="plotly_white",
                  )
fig.update_layout(height=600, title = "Major Region and corporations from around the world which had downsized their workforces" , title_x = 0.47,)
fig.show()

### <span style="color:#002CFF;"> Industries that downsized in USA between 2020 and 22  </span>

In [None]:
data_usa = data_usa.reset_index()
data_usa['date'] = pd.to_datetime(data_usa['date'])
data_usa = data_usa.groupby([ data_usa.industry, data_usa.date.dt.year]).sum().sort_values(by = ['total_laid_off'], ascending = False)
data_usa = data_usa.reset_index()

In [None]:
usa_22 = data_usa.loc[data_usa['date'] == 2022]
usa_21 = data_usa.loc[data_usa['date'] == 2021]
usa_20 = data_usa.loc[data_usa['date'] == 2020]

In [None]:
fig = go.Figure()

fig = make_subplots(rows=1, 
                    cols=3)

fig.add_trace(go.Bar(x = usa_22['industry'],
                y = usa_22['total_laid_off'],
                name='2022',
                marker_color='rgb(179, 224, 255)'
                ), 1, 3 )
fig.add_trace(go.Bar(x = usa_21['industry'],
                y = usa_21['total_laid_off'],
                name='2021',
                marker_color='rgb(0, 153, 255)'
                ), 1, 2)

fig.add_trace(go.Bar(x = usa_20['industry'],
                y = usa_20['total_laid_off'],
                name='2020',
                marker_color='rgb(0, 92, 153)'
                ), 1, 1)

fig.update_layout(height=700,
    title='Industries that downsized in USA between 2020 and 22',
    yaxis=dict(
        title='Number of Laid-off Employees',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=1.0,
        y=1.0),
    barmode='group',
    bargroupgap=0.25,
    template="plotly_white"
)
fig.update_xaxes(
        tickangle = 90,
        title_text = "Industries",
        title_font = {"size": 20},
        title_standoff = 25)
fig.show()

#### **In 2020, the economic downturn had a negative impact on retail and transportation.**
#### **The real estate, construction, and food industries all suffered significant losses in 2021.**
#### **In 2022, layoffs significantly impacted the US retail and consumer sectors.**
#### **The year 2022 mirrors the 2020 graph, but with greater losses.**

### <span style="color:#6300FF ;"> Industries that downsized in India between 2020 and 22  </span>


In [None]:
data_india = data_india.reset_index()
data_india['date'] = pd.to_datetime(data_india['date'])
data_india = data_india.groupby([ data_india.industry, data_india.date.dt.year]).sum().sort_values(by = ['total_laid_off'], ascending = False)
data_india = data_india.reset_index()


In [None]:
india_22 = data_india.loc[data_india['date'] == 2022]
india_21 = data_india.loc[data_india['date'] == 2021]
india_20 = data_india.loc[data_india['date'] == 2020]

In [None]:
fig = go.Figure()

fig = make_subplots(rows=1, 
                    cols=3)

fig.add_trace(go.Bar(x = india_22['industry'],
                y = india_22['total_laid_off'],
                name='2022',
                marker_color='rgb(179, 224, 255)'
                ), 1, 3 )
fig.add_trace(go.Bar(x = india_21['industry'],
                y = india_21['total_laid_off'],
                name='2021',
                marker_color='rgb(0, 153, 255)'
                ), 1, 2)

fig.add_trace(go.Bar(x = india_20['industry'],
                y = india_20['total_laid_off'],
                name='2020',
                marker_color='rgb(0, 92, 153)'
                ), 1, 1)

fig.update_layout(height=700,
    title='Industries that downsized in India between 2020 and 22',
    yaxis=dict(
        title='Number of Laid-off Employees',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=1.0,
        y=1.0),
    barmode='group',
    bargroupgap=0.25,
    template="plotly_white"
)
fig.update_xaxes(
        tickangle = 90,
        title_text = "Industries",
        title_font = {"size": 20},
        title_standoff = 25)
fig.show()

#### **Layoffs had a significant impact on India's online education platforms like Byju's and White Hat Junior in 2021 and 2022.**
#### **Downsizing had a significant impact on the food, transportation, and consumer industries between 2020 and 2022.**
#### **Incredibly, the financial sector saw losses due to layoffs in 2020.**

### <span style="color:#770737;">Industries that downsized in the World between 2020 and 22 (Excluding USA & India)  </span>

In [None]:
data_world = data_world.reset_index()
data_world['date'] = pd.to_datetime(data_world['date'])
data_world = data_world.groupby([ data_world.industry, data_world.date.dt.year]).sum().sort_values(by = ['total_laid_off'], ascending = False)
data_world = data_world.reset_index()

In [None]:
world_22 = data_world.loc[data_world['date'] == 2022]
world_21 = data_world.loc[data_world['date'] == 2021]
world_20 = data_world.loc[data_world['date'] == 2020]

In [None]:
fig = go.Figure()

fig = make_subplots(rows=1, 
                    cols=3)

fig.add_trace(go.Bar(x = world_22['industry'],
                y = world_22['total_laid_off'],
                name='2022',
                marker_color='rgb(179, 224, 255)'
                ), 1, 3 )
fig.add_trace(go.Bar(x = world_21['industry'],
                y = world_21['total_laid_off'],
                name='2021',
                marker_color='rgb(0, 153, 255)'
                ), 1, 2)

fig.add_trace(go.Bar(x = world_20['industry'],
                y = world_20['total_laid_off'],
                name='2020',
                marker_color='rgb(0, 92, 153)'
                ), 1, 1)

fig.update_layout(height=700,
    title='Industries that downsized in the World between 2020 and 22 (Excluding USA & India)',
    yaxis=dict(
        title='Number of Laid-off Employees',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=1.0,
        y=1.0),
    barmode='group',
    bargroupgap=0.25,
    template="plotly_white"
)
fig.update_xaxes(
        tickangle = 90,
        title_text = "Industries",
        title_font = {"size": 20},
        title_standoff = 25)
fig.show()

#### **In 2020 and 22, travel, transportation, and finance were severely impacted.**
#### **The downfall of Cryto can be seen in 2022. Retail, food, marketing, and healthcare all experienced significant losses at the same time.**

<a id="4"></a>
# **<span style="color:#00BFC4;"> In Summary </span>**

#### **Comparing the three regions reveals that between 2020 and 22 the transport, retail, and consumer sectors experienced the greatest impact.**
#### **In US and India, finance sectors losses were comparable in 2020.**
#### **In 2022, cryptocurrency experienced job losses in the USA and other countries.**
#### **Due to a lack of data, it is challenging to make conclusions from the year 2021.**