In [165]:
import pandas as pd
import numpy as np
from pathlib import Path

In [166]:
# Load the layoff dataset.
file_path = "./Resources/layoffs.csv"
layoff_df = pd.read_csv(file_path, index_col=False)
layoff_df.head()

Unnamed: 0,company,location,industry,total_laid_off,percentage_laid_off,date,stage,country,funds_raised
0,Pear Therapeutics,Boston,Healthcare,170.0,0.92,2023-04-07,Post-IPO,United States,409.0
1,ZestMoney,Bengaluru,Finance,100.0,0.2,2023-04-07,Series C,India,120.0
2,Absolute Software,Vancouver,Security,40.0,0.05,2023-04-06,Post-IPO,Canada,
3,Avocargo,Berlin,Transportation,16.0,1.0,2023-04-06,Seed,Germany,
4,Dunzo,Bengaluru,Food,300.0,0.3,2023-04-05,Unknown,India,382.0


In [167]:
#Drop columns that are useless
lay = layoff_df.drop(columns=['funds_raised', 'stage'])
lay.head()

Unnamed: 0,company,location,industry,total_laid_off,percentage_laid_off,date,country
0,Pear Therapeutics,Boston,Healthcare,170.0,0.92,2023-04-07,United States
1,ZestMoney,Bengaluru,Finance,100.0,0.2,2023-04-07,India
2,Absolute Software,Vancouver,Security,40.0,0.05,2023-04-06,Canada
3,Avocargo,Berlin,Transportation,16.0,1.0,2023-04-06,Germany
4,Dunzo,Bengaluru,Food,300.0,0.3,2023-04-05,India


In [168]:
#Check dtypes
lay.dtypes

company                 object
location                object
industry                object
total_laid_off         float64
percentage_laid_off    float64
date                    object
country                 object
dtype: object

In [169]:
# Convert dates to datetime

lay['date']= pd.to_datetime(lay['date'])
print(lay.dtypes)

company                        object
location                       object
industry                       object
total_laid_off                float64
percentage_laid_off           float64
date                   datetime64[ns]
country                        object
dtype: object


In [170]:
# Change data to check for 2nd Quarter
lay = lay[(lay['date'] > "2022-03-31")&(lay['date'] < "2022-07-01")]
lay.head()

Unnamed: 0,company,location,industry,total_laid_off,percentage_laid_off,date,country
1457,Enjoy,SF Bay Area,Retail,400.0,0.18,2022-06-30,United States
1458,Crejo.Fun,Bengaluru,Education,170.0,1.0,2022-06-30,India
1459,Stash Financial,New York City,Finance,40.0,0.08,2022-06-30,United States
1460,Nate,New York City,Retail,30.0,0.2,2022-06-30,United States
1461,Snyk,Boston,Security,30.0,,2022-06-30,United States


In [171]:
#Drop the null rows and keep other table
lay_drop = lay.dropna()
lay_drop.head()

Unnamed: 0,company,location,industry,total_laid_off,percentage_laid_off,date,country
1457,Enjoy,SF Bay Area,Retail,400.0,0.18,2022-06-30,United States
1458,Crejo.Fun,Bengaluru,Education,170.0,1.0,2022-06-30,India
1459,Stash Financial,New York City,Finance,40.0,0.08,2022-06-30,United States
1460,Nate,New York City,Retail,30.0,0.2,2022-06-30,United States
1462,Stream,Boulder,Product,20.0,0.12,2022-06-30,United States


In [172]:
#Count with Nan values.
lay.count()

company                303
location               303
industry               303
total_laid_off         229
percentage_laid_off    217
date                   303
country                303
dtype: int64

In [173]:
#Count with Nan Dropped
lay_drop.count()

company                176
location               176
industry               176
total_laid_off         176
percentage_laid_off    176
date                   176
country                176
dtype: int64

In [177]:
#Create Csv with Nan
lay.to_csv("quarter2_nan.csv", encoding="utf-8",index = False)

In [178]:
#Create Csv clean
lay_drop.to_csv("quarter2.csv", encoding="utf-8",index = False)