**Recommended Analysis**
* Which companies offer the most paid parental leave weeks?
* Is maternity leave typically longer than paternity leave?
* What is the distribution of parental leave weeks offered?
* Are there noticeable differences between industries?


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df=pd.read_csv("/kaggle/input/things-we-do-for-family-some-bald-guy/parental_leave.csv",encoding='unicode_escape')
df.head()

In [None]:
df.describe()

In [None]:
df.info()

# 1.Which companies offer the most paid parental leave weeks?
# 

In [None]:
# Create a new column for total paid parental leave weeks
df['Total Paid Parental Leave Weeks'] = df['Paid Maternity Leave'].fillna(0) + df['Paid Paternity Leave'].fillna(0)

# Sort the DataFrame by total paid parental leave weeks in descending order
df_sorted = df.sort_values(by='Total Paid Parental Leave Weeks', ascending=False)

# Display the top companies offering the most paid parental leave weeks
top_companies = df_sorted[['Company', 'Total Paid Parental Leave Weeks']].head(10)
print(top_companies)


In [None]:
# Select the top 10 companies
top_companies = df_sorted[['Company', 'Total Paid Parental Leave Weeks']].head(10)

# Plotting the top companies offering the most paid parental leave weeks
plt.figure(figsize=(10, 6))
plt.barh(top_companies['Company'], top_companies['Total Paid Parental Leave Weeks'], color='skyblue')
plt.xlabel('Total Paid Parental Leave Weeks')
plt.ylabel('Company')
plt.title('Top Companies Offering the Most Paid Parental Leave Weeks')
plt.gca().invert_yaxis()  # Invert y-axis to display the highest value at the top
plt.show()


# 2.Is maternity leave typically longer than paternity leave?
# 

In [None]:
# Calculate the average duration of paid maternity leave
avg_paid_maternity_leave = df['Paid Maternity Leave'].mean()

# Calculate the average duration of paid paternity leave
avg_paid_paternity_leave = df['Paid Paternity Leave'].mean()

# Print the average durations
print("Average duration of paid maternity leave:", avg_paid_maternity_leave)
print("Average duration of paid paternity leave:", avg_paid_paternity_leave)


In [None]:
# Calculate the average duration of paid maternity leave
avg_paid_maternity_leave = df['Paid Maternity Leave'].mean()

# Calculate the average duration of paid paternity leave
avg_paid_paternity_leave = df['Paid Paternity Leave'].mean()

# Plotting the comparison
labels = ['Paid Maternity Leave', 'Paid Paternity Leave']
averages = [avg_paid_maternity_leave, avg_paid_paternity_leave]

plt.figure(figsize=(8, 6))
plt.bar(labels, averages, color=['blue', 'orange'])
plt.title('Comparison of Average Duration of Paid Maternity and Paternity Leave')
plt.ylabel('Average Duration (weeks)')
plt.show()


# 3.What is the distribution of parental leave weeks offered?
# 

In [None]:
import matplotlib.pyplot as plt

# Extract parental leave weeks data (paid maternity, unpaid maternity, paid paternity, unpaid paternity)
paid_maternity_leave = df['Paid Maternity Leave']
unpaid_maternity_leave = df['Unpaid Maternity Leave']
paid_paternity_leave = df['Paid Paternity Leave']
unpaid_paternity_leave = df['Unpaid Paternity Leave']

# Concatenate all parental leave weeks data
parental_leave_weeks = pd.concat([paid_maternity_leave, unpaid_maternity_leave, paid_paternity_leave, unpaid_paternity_leave])
parental_leave_weeks



In [None]:
# Plotting the histogram
plt.figure(figsize=(10, 6))
plt.hist(parental_leave_weeks.dropna(), bins=20, color='skyblue', edgecolor='black')
plt.title('Distribution of Parental Leave Weeks Offered')
plt.xlabel('Number of Weeks')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()


# 4.Are there noticeable differences between industries?
# 

In [None]:
# Assuming df is your DataFrame containing the maternity and paternity leave data

# Select only the numeric columns for calculation
numeric_columns = df.select_dtypes(include=['float64'])

# Group the data by industry and calculate the average leave policies within each industry
industry_avg_leave = numeric_columns.groupby(df['Industry']).mean()

# Print the average leave policies across industries
print("Average Maternity and Paternity Leave Policies Across Industries:")
print(industry_avg_leave)
