In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
import plotly.express as px 
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
layoff_df = pd.read_csv("../data/layoffs.csv")
layoff_df

In [None]:
layoff_df.info

In [None]:
print(layoff_df.dtypes)

In [None]:
layoff_df.isnull().sum()

# Exploring Industry Data

In [None]:
industry = pd.DataFrame(layoff_df.industry.value_counts().reset_index())
industry

In [None]:
industry.columns = ['Industry', 'Companies']
industry

In [None]:
industry.Companies.nlargest(5)

In [None]:
topind_layoffs = industry[industry['Industry'].isin(['Finance', 'Retail', 'Healthcare','Transportation', 'Marketing'])]

In [None]:
plt.bar(topind_layoffs.Industry,topind_layoffs.Companies)
plt.title('Industry with the most Layoffs')
plt.xlabel('Industry')
plt.ylabel('Number of Companies')
plt.show()

# Exploring Location Data

In [None]:
location = pd.DataFrame(layoff_df.location.value_counts().reset_index())
location

In [None]:
location.columns = ['Location', 'Companies']
location

In [None]:
top_location = location[location['Location'].isin(['SF Bay Area', 'New York City', 'Los Angeles','Boston', 'Seattle'])]

In [None]:
plt.step(top_location.Location,top_location.Companies, color='green')
plt.title('Location with the most Layoffs')
plt.xlabel('Location')
plt.ylabel('Number of Companies')
plt.show()

# Exploring Country Data

In [None]:
country = pd.DataFrame(layoff_df.country.value_counts().reset_index())
country

In [None]:
country.columns = ['Country', 'Companies']
country

In [None]:
top_countries = country[country['Country'].isin(['United States', 'India', 'Canada','Brazil', 'United Kingdom'])]

In [None]:
plt.stackplot(top_countries.Country,top_countries.Companies, color='purple')
plt.title('Country with the most Layoffs')
plt.xlabel('Country')
plt.ylabel('Number of Companies')
plt.show()

In [None]:
# Creating dataset
nations = ['United States', 'India', 'Canada',
           'Brazil', 'United Kingdom']


nation_data = [1062, 101, 69, 29, 52, 46]


# Creating explode data
explode = (0.1, 0.0, 0.2, 0.3, 0.0, 0.0)

# Creating color parameters
colors = ( "orange", "cyan", "brown",
          "grey", "indigo", "beige")

# Wedge properties
wp = { 'linewidth' : 1, 'edgecolor' : "green" }

# Creating autocpt arguments
def func(pct, allvalues):
    absolute = int(pct / 100.*np.sum(allvalues))
    return "{:.1f}%\n({:d} g)".format(pct, absolute)

# Creating plot
fig, ax = plt.subplots(figsize =(10, 7))
wedges, texts, autotexts = ax.pie(nation_data,
                                autopct = lambda pct: func(pct, nation_data),
                                explode = explode,
                                labels = nations,
                                shadow = True,
                                colors = colors,
                                startangle = 90,
                                wedgeprops = wp,
                                textprops = dict(color ="magenta"))

# Adding legend
ax.legend(wedges, nations,
        title ="Countries",
        loc ="center left",
        bbox_to_anchor =(1, 0, 0.5, 1))

plt.setp(autotexts, size = 8, weight ="bold")
ax.set_title("Countries with the most Layoffs")

# show plot
plt.show()


In [None]:
fig = px.pie(layoff_df, values='total_laid_off', names='country',hole=.5,
                    template="plotly_white")
fig.update_traces(textposition='inside',textinfo='percent+label')
fig.update_layout(height=700,
                  title='Percentage of Laid-off Employees By Country')
fig.show()

# Exploring Company Data

In [None]:
top_companies = layoff_df.nlargest(5,['total_laid_off'])
top_companies

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x=top_companies.company, y=top_companies.total_laid_off)
plt.title("Company with the most Layoffs", fontsize=15, fontweight='bold')
plt.xlabel("Company")
plt.ylabel("Number of Layoffs")
plt.show()

In [None]:
nested_pie =  layoff_df.sort_values('total_laid_off', ascending=False).drop_duplicates('country').iloc[:10]

fig = px.sunburst(nested_pie, path=['country','company'], values='total_laid_off',
                  color='total_laid_off',
                  color_continuous_scale='rdbu',template="plotly_white",
                  )
fig.update_layout(height=650, title = "Major Corporations from Ten Nations that have reduced their Workforce" , title_x = 0.47,)
fig.show()

# Exploring Stage Data

In [None]:
layoff_df.stage.unique()

In [None]:
layoff_df.groupby(["stage"]).total_laid_off.sum()

In [None]:
px.bar(layoff_df.groupby(["stage"]).total_laid_off.sum().sort_values(ascending=False),title='Layoffs By Company Stage',
       text_auto=True,orientation='h')

# General Statistics

Let's do a correlation map of all the data

In [None]:
layoff_df.corr()

In [None]:
sns.heatmap(layoff_df.corr(), annot=True, cmap="coolwarm")

Let's get some more info in regards to the number of people laid off.

In [None]:
layoff_df.total_laid_off.mean()

In [None]:
layoff_df.total_laid_off.min()

In [None]:
layoff_df.total_laid_off.max()

In [None]:
layoff_df.total_laid_off.sum()

Now let's do the same for the percent laid off

In [None]:
layoff_df.percentage_laid_off.mean()

In [None]:
layoff_df.percentage_laid_off.min()

In [None]:
layoff_df.percentage_laid_off.max()

In [None]:
layoff_df.percentage_laid_off.sum()

Now let's do it for the amount of funds raised

In [None]:
layoff_df.funds_raised.mean()

In [None]:
layoff_df.funds_raised.min()

In [None]:
layoff_df.funds_raised.max()

In [None]:
layoff_df.funds_raised.sum()

Let's make columns for the day month and year

In [None]:
layoff2 = layoff_df.copy()

layoff2['year']=layoff_df['date'].str.split('-').str[0]
layoff2['month']=layoff_df['date'].str.split('-').str[1]
layoff2['day']=layoff_df['date'].str.split('-').str[2]

layoff2['year']=layoff2['year'].astype(int)
layoff2['month']=layoff2['month'].astype(int)
layoff2['day']=layoff2['day'].astype(int)

# Alternative formula year is df["Year"] = df["date"].map(lambda x : x[:4])

In [None]:
layoff2.head(3)

# Yearly Data Analysis

In [None]:
layoff2.groupby(["year"],sort=False)["total_laid_off"].max()

In [None]:
g = sns.catplot(x='year', y='total_laid_off', data=layoff2)
g.fig.set_figwidth(10)
g.fig.set_figheight(6) 

In [None]:
top_3_year_wise = layoff2.groupby(['year','company']).total_laid_off.agg([max])
top_3_year_wise

In [None]:
g = top_3_year_wise["max"].groupby(['year'],group_keys=False)
top_3_year_wise2 = g.apply(lambda x : x.sort_values(ascending=False).head(3))
top_3_year_wise2

In [None]:
g.nlargest(3)

In [None]:
top_3_year_wise2.index
top_3_year_wise2.values

In [None]:
top_3_year_wise3 = pd.DataFrame()
top_3_year_wise3["total_laid_off"] = top_3_year_wise2.values
top_3_year_wise3

In [None]:
top_3_year_wise3.columns = ['Number of Layoffs']

In [None]:
y = []
c = []
for i,j in top_3_year_wise2.index:
    y.append(i)
    c.append(j)
top_3_year_wise3["Year"] = y
top_3_year_wise3["Company"] = c
top_3_year_wise3

In [None]:
px.bar(top_3_year_wise3,x='Year',y='Number of Layoffs',color='Company', title='Top 3 Companies with the most Layoffs Year-wise',text_auto=True)

In [None]:
top_3_loction_year_wise = layoff2.groupby(["year","location"]).total_laid_off.agg([max])
top_3_loction_year_wise

In [None]:
gloc = top_3_loction_year_wise.groupby(["year"],group_keys=False)
top_3_loction_year_wise2 = gloc.apply(lambda x : x.sort_values(["max"], ascending=False).head(3)) 
top_3_loction_year_wise2

In [None]:
top_3_loction_year_wise2.values.reshape(9,).tolist()

In [None]:
top_3_loction_year_wise3 = pd.DataFrame()
top_3_loction_year_wise3["total_laid_off"] = top_3_loction_year_wise2.values.reshape(9,).tolist()
top_3_loction_year_wise3

In [None]:
top_3_loction_year_wise3.columns = ['Number of Layoffs']

In [None]:
y = []
l = []
for i,j in top_3_loction_year_wise2.index:
    y.append(i)
    l.append(j)
top_3_loction_year_wise3["Year"] = y
top_3_loction_year_wise3["Location"] = l
top_3_loction_year_wise3

In [None]:
px.bar(top_3_loction_year_wise3,x='Year',y='Number of Layoffs',color='Location', title='Top 3 Locations Year-wise with the most Layoffs',text_auto=True)

In [None]:
total_laid_year_country_wise  = layoff2.groupby(["year","country"]).total_laid_off.sum()
total_laid_year_country_wise 

In [None]:
len(layoff2.country.unique())

In [None]:
total_laid_year_country_wise_year = []
total_laid_year_country_wise_country = []
for i,j in total_laid_year_country_wise.index:
    total_laid_year_country_wise_year.append(i)
    total_laid_year_country_wise_country.append(j)

In [None]:
total_laid_year_country_wise2 = pd.DataFrame({
    "Year": total_laid_year_country_wise_year,
    "Country": total_laid_year_country_wise_country,
    "total_laid_off": total_laid_year_country_wise.values 
})

In [None]:
total_laid_year_country_wise2

In [None]:
total_laid_year_country_wise2.columns = ['Year', 'Country', 'Number of Layoffs']

In [None]:
total_laid_year_country_wise2.sort_values(["Year","Number of Layoffs"],ascending=False,inplace=True)

In [None]:
px.bar(total_laid_year_country_wise2,x='Year',y='Number of Layoffs',color='Country',text='Country',
      title='Layoffs By Country Year-wise'
      )

# United States Data

In [None]:
usa_df = layoff2[layoff2['country']=="United States"]

In [None]:
plt.figure(figsize=(7,7))
sns.countplot(x=usa_df['industry'], data=usa_df)
plt.title('Most Affected Industries in the USA')
plt.xticks(rotation=90)
plt.show()

In [None]:
loc = sns.displot(x=usa_df['location'], data=usa_df, kde=True)
plt.title('Most Affected Locations in the USA')
loc.fig.set_figwidth(10)
loc.fig.set_figheight(6)

In [None]:
usa_companies = usa_df.groupby(['company']).sum().sort_values(['total_laid_off'],ascending=False)
usa_companies

In [None]:
px.bar(usa_companies.iloc[:10,:1], text_auto=True,title='Top 10 companies in the USA with the most Layoffs ')

Let's do an autovisualization of all the data

In [None]:
#from autoviz.AutoViz_Class import AutoViz_Class

#AV = AutoViz_Class()

#viz = AV.AutoViz("../data/layoffs.csv", sep=",")