## Reading csv data 

In [None]:
import pandas as p 

data = p.read_csv("Salary.csv")

In [3]:
data

Unnamed: 0,Age,Gender,Education Level,Job Title,Years of Experience,Salary
0,32.0,Male,Bachelor's,Software Engineer,5.0,90000.0
1,28.0,Female,Master's,Data Analyst,3.0,65000.0
2,45.0,Male,PhD,Senior Manager,15.0,150000.0
3,36.0,Female,Bachelor's,Sales Associate,7.0,60000.0
4,52.0,Male,Master's,Director,20.0,200000.0
...,...,...,...,...,...,...
370,35.0,Female,Bachelor's,Senior Marketing Analyst,8.0,85000.0
371,43.0,Male,Master's,Director of Operations,19.0,170000.0
372,29.0,Female,Bachelor's,Junior Project Manager,2.0,40000.0
373,34.0,Male,Bachelor's,Senior Operations Coordinator,7.0,90000.0


In [13]:
education_stats = data.groupby('Education Level')['Salary'].agg([
    ('Average Salary', 'mean'),
    ('Minimum Salary', 'min'),
    ('Maximum Salary', 'max'),
    ('Count', 'count')
]).round(2)

print("Basic Statistics by Education Level:")
print(education_stats)

Basic Statistics by Education Level:
                 Average Salary  Minimum Salary  Maximum Salary  Count
Education Level                                                       
Bachelor's             74756.03           350.0        250000.0    224
Master's              129795.92         40000.0        200000.0     98
PhD                   157843.14        110000.0        250000.0     51


In [16]:
highest_paid = data.loc[data.groupby('Education Level')['Salary'].idxmax()]
print("\nHighest Paid Person in Each Education Level:")
print(highest_paid[['Education Level', 'Job Title', 'Salary', 'Years of Experience']])


Highest Paid Person in Each Education Level:
   Education Level                 Job Title    Salary  Years of Experience
30      Bachelor's                       CEO  250000.0                 25.0
4         Master's                  Director  200000.0                 20.0
83             PhD  Chief Technology Officer  250000.0                 24.0


In [17]:
median_salary = data.groupby('Education Level')['Salary'].median()
print("\nMedian Salary by Education Level:")
print(median_salary)


Median Salary by Education Level:
Education Level
Bachelor's     65000.0
Master's      120000.0
PhD           155000.0
Name: Salary, dtype: float64


In [22]:
# Group by both Education Level and Gender
education_gender_stats = data.groupby(['Education Level', 'Gender'])['Salary'].agg([
    ('Average Salary', 'mean'),
    ('Minimum Salary', 'min'),
    ('Maximum Salary', 'max'),
    ('Count', 'count')
]).round(2)

print("Basic Statistics by Education Level and Gender:")
print(education_gender_stats)

'''
# If you want to see the gender pay gap within each education level
gender_gap = data.groupby('Education Level').apply(
    lambda x: p.Series({
        'Male_Avg_Salary': x[x['Gender'] == 'Male']['Salary'].mean(),
        'Female_Avg_Salary': x[x['Gender'] == 'Female']['Salary'].mean(),
        'Gender_Gap': x[x['Gender'] == 'Male']['Salary'].mean() - x[x['Gender'] == 'Female']['Salary'].mean()
    })
).round(2)

print("\nGender Pay Gap Analysis by Education Level:")
print(gender_gap)
'''

Basic Statistics by Education Level and Gender:
                        Average Salary  Minimum Salary  Maximum Salary  Count
Education Level Gender                                                       
Bachelor's      Female        71019.42         35000.0        160000.0    103
                Male          77936.78           350.0        250000.0    121
Master's        Female       120200.00         40000.0        190000.0     50
                Male         139791.67         75000.0        200000.0     48
PhD             Female       155384.62        140000.0        185000.0     26
                Male         160400.00        110000.0        250000.0     25


'\n# If you want to see the gender pay gap within each education level\ngender_gap = data.groupby(\'Education Level\').apply(\n    lambda x: p.Series({\n        \'Male_Avg_Salary\': x[x[\'Gender\'] == \'Male\'][\'Salary\'].mean(),\n        \'Female_Avg_Salary\': x[x[\'Gender\'] == \'Female\'][\'Salary\'].mean(),\n        \'Gender_Gap\': x[x[\'Gender\'] == \'Male\'][\'Salary\'].mean() - x[x[\'Gender\'] == \'Female\'][\'Salary\'].mean()\n    })\n).round(2)\n\nprint("\nGender Pay Gap Analysis by Education Level:")\nprint(gender_gap)\n'

In [21]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# 1. Interactive Box Plot
fig1 = px.box(data, x='Education Level', y='Salary', 
              title='Salary Distribution by Education Level',
              color='Education Level')
fig1.show()

# 2. Interactive Violin Plot
fig2 = px.violin(data, x='Education Level', y='Salary',
                 box=True, points="all",
                 title='Salary Distribution with Individual Points',
                 color='Education Level')
fig2.show()

# 3. Scatter Plot: Salary vs Experience colored by Education
fig3 = px.scatter(data, x='Years of Experience', y='Salary',
                  color='Education Level',
                  size='Salary',
                  title='Salary vs Experience by Education Level',
                  hover_data=['Job Title'])
fig3.show()

# 4. Combined Bar Chart
avg_salary = data.groupby('Education Level')['Salary'].mean()
max_salary = data.groupby('Education Level')['Salary'].max()

fig4 = go.Figure(data=[
    go.Bar(name='Average Salary', x=avg_salary.index, y=avg_salary.values),
    go.Bar(name='Maximum Salary', x=max_salary.index, y=max_salary.values)
])
fig4.update_layout(title='Average vs Maximum Salary by Education Level',
                  barmode='group')
fig4.show()

# 5. Sunburst Chart for Job Titles and Education
fig5 = px.sunburst(data, path=['Education Level', 'Job Title'], values='Salary',
                   title='Hierarchy of Salaries by Education and Job Title')
fig5.show()

ModuleNotFoundError: No module named 'plotly'