In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load dataset
df = pd.read_csv('layoffs.csv')
print("Initial shape:", df.shape)

# Drop missing critical values
df.dropna(subset=['company', 'industry'], inplace=True)
df['date'] = pd.to_datetime(df['date'], errors='coerce')
df['country'] = df['location'].apply(lambda x: x.split(',')[-1].strip())

# Visualize missing values
sns.heatmap(df.isnull(), cbar=False)
plt.title("Missing Values Heatmap")
plt.show()

# Top companies by number of layoffs
top_companies = df['company'].value_counts().head(10)
top_companies.plot(kind='barh')
plt.title("Top 10 Companies by Layoffs")
plt.xlabel("Count")
plt.gca().invert_yaxis()
plt.show()

# Save cleaned data
df.to_csv('../data/cleaned_layoffs.csv', index=False)
print("Cleaned data saved.")