# Data Visualization

This notebook demonstrates data visualization using matplotlib and seaborn.

## 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set style for better-looking plots
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (10, 6)

# For inline plotting in Jupyter
%matplotlib inline

## 2. Load Sample Data

In [None]:
# Load employee data
employees_df = pd.read_csv('../../sample_data/employees.csv')
print("Data loaded successfully")
employees_df.head()

## 3. Bar Charts

In [None]:
# Employee count by department
dept_counts = employees_df['Department'].value_counts()

plt.figure(figsize=(10, 6))
dept_counts.plot(kind='bar', color='skyblue', edgecolor='black')
plt.title('Employee Count by Department', fontsize=16, fontweight='bold')
plt.xlabel('Department', fontsize=12)
plt.ylabel('Number of Employees', fontsize=12)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Average salary by department
avg_salary = employees_df.groupby('Department')['Salary'].mean().sort_values(ascending=False)

plt.figure(figsize=(10, 6))
avg_salary.plot(kind='barh', color='lightgreen', edgecolor='black')
plt.title('Average Salary by Department', fontsize=16, fontweight='bold')
plt.xlabel('Average Salary ($)', fontsize=12)
plt.ylabel('Department', fontsize=12)
plt.tight_layout()
plt.show()

## 4. Scatter Plots

In [None]:
# Salary vs Years of Experience
plt.figure(figsize=(10, 6))
plt.scatter(employees_df['Years_Experience'], employees_df['Salary'], 
            c='coral', s=100, alpha=0.6, edgecolors='black')
plt.title('Salary vs Years of Experience', fontsize=16, fontweight='bold')
plt.xlabel('Years of Experience', fontsize=12)
plt.ylabel('Salary ($)', fontsize=12)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Colored by department
plt.figure(figsize=(12, 6))
departments = employees_df['Department'].unique()
colors = ['red', 'blue', 'green']

for dept, color in zip(departments, colors):
    dept_data = employees_df[employees_df['Department'] == dept]
    plt.scatter(dept_data['Years_Experience'], dept_data['Salary'], 
                label=dept, c=color, s=100, alpha=0.6, edgecolors='black')

plt.title('Salary vs Experience by Department', fontsize=16, fontweight='bold')
plt.xlabel('Years of Experience', fontsize=12)
plt.ylabel('Salary ($)', fontsize=12)
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 5. Histograms

In [None]:
# Distribution of salaries
plt.figure(figsize=(10, 6))
plt.hist(employees_df['Salary'], bins=8, color='purple', alpha=0.7, edgecolor='black')
plt.title('Distribution of Salaries', fontsize=16, fontweight='bold')
plt.xlabel('Salary ($)', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()

In [None]:
# Distribution of age
plt.figure(figsize=(10, 6))
plt.hist(employees_df['Age'], bins=10, color='orange', alpha=0.7, edgecolor='black')
plt.title('Distribution of Employee Ages', fontsize=16, fontweight='bold')
plt.xlabel('Age', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()

## 6. Box Plots

In [None]:
# Salary distribution by department using seaborn
plt.figure(figsize=(10, 6))
sns.boxplot(x='Department', y='Salary', data=employees_df, palette='Set2')
plt.title('Salary Distribution by Department', fontsize=16, fontweight='bold')
plt.xlabel('Department', fontsize=12)
plt.ylabel('Salary ($)', fontsize=12)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 7. Pie Charts

In [None]:
# Department distribution
dept_counts = employees_df['Department'].value_counts()

plt.figure(figsize=(10, 8))
colors = ['#ff9999', '#66b3ff', '#99ff99']
plt.pie(dept_counts.values, labels=dept_counts.index, autopct='%1.1f%%',
        colors=colors, startangle=90, explode=(0.05, 0, 0))
plt.title('Employee Distribution by Department', fontsize=16, fontweight='bold')
plt.axis('equal')
plt.tight_layout()
plt.show()

## 8. Subplots - Multiple Charts

In [None]:
# Create a 2x2 grid of plots
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Plot 1: Bar chart
dept_counts.plot(kind='bar', ax=axes[0, 0], color='skyblue', edgecolor='black')
axes[0, 0].set_title('Employee Count by Department', fontweight='bold')
axes[0, 0].set_xlabel('Department')
axes[0, 0].set_ylabel('Count')
axes[0, 0].tick_params(axis='x', rotation=45)

# Plot 2: Scatter plot
axes[0, 1].scatter(employees_df['Years_Experience'], employees_df['Salary'], 
                   c='coral', s=100, alpha=0.6, edgecolors='black')
axes[0, 1].set_title('Salary vs Experience', fontweight='bold')
axes[0, 1].set_xlabel('Years of Experience')
axes[0, 1].set_ylabel('Salary ($)')
axes[0, 1].grid(True, alpha=0.3)

# Plot 3: Histogram
axes[1, 0].hist(employees_df['Salary'], bins=8, color='purple', alpha=0.7, edgecolor='black')
axes[1, 0].set_title('Salary Distribution', fontweight='bold')
axes[1, 0].set_xlabel('Salary ($)')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].grid(True, alpha=0.3, axis='y')

# Plot 4: Box plot using seaborn
sns.boxplot(x='Department', y='Salary', data=employees_df, ax=axes[1, 1], palette='Set2')
axes[1, 1].set_title('Salary by Department', fontweight='bold')
axes[1, 1].set_xlabel('Department')
axes[1, 1].set_ylabel('Salary ($)')
axes[1, 1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## 9. Practice Exercise

Try creating these visualizations:
1. A line plot showing the relationship between age and years of experience
2. A violin plot of salary by department (use seaborn)
3. A heatmap showing correlations between numerical columns
4. Create your own custom visualization!

In [None]:
# Your practice code here
