# Data Analysis with Pandas

This notebook demonstrates basic data analysis using pandas with CSV and JSON data.

## 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import json

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

## 2. Loading CSV Data

In [None]:
# Load employee data from CSV
employees_df = pd.read_csv('../../sample_data/employees.csv')

# Display the first few rows
print("First 5 rows of employee data:")
employees_df.head()

In [None]:
# Basic information about the dataset
print("Dataset Info:")
employees_df.info()

print("\nDataset Shape:")
print(f"Rows: {employees_df.shape[0]}, Columns: {employees_df.shape[1]}")

## 3. Basic Data Analysis

In [None]:
# Statistical summary
print("Statistical Summary:")
employees_df.describe()

In [None]:
# Group by department and calculate average salary
print("Average Salary by Department:")
dept_salary = employees_df.groupby('Department')['Salary'].mean()
print(dept_salary)

In [None]:
# Count employees per department
print("Employee Count by Department:")
dept_count = employees_df['Department'].value_counts()
print(dept_count)

## 4. Data Filtering and Selection

In [None]:
# Filter employees with salary > 70000
high_earners = employees_df[employees_df['Salary'] > 70000]
print("Employees with salary > $70,000:")
high_earners

In [None]:
# Filter Engineering department
engineering = employees_df[employees_df['Department'] == 'Engineering']
print("Engineering Department Employees:")
engineering

## 5. Loading JSON Data

In [None]:
# Load product data from JSON
with open('../../sample_data/products.json', 'r') as f:
    products_data = json.load(f)

# Convert to DataFrame
products_df = pd.DataFrame(products_data['products'])
print("Product Data:")
products_df

In [None]:
# Calculate average rating for each product
products_df['avg_rating'] = products_df['ratings'].apply(lambda x: np.mean(x))
print("Products with Average Ratings:")
products_df[['name', 'price', 'avg_rating']]

In [None]:
# Calculate total inventory value
products_df['inventory_value'] = products_df['price'] * products_df['stock']
total_value = products_df['inventory_value'].sum()
print(f"Total Inventory Value: ${total_value:,.2f}")

## 6. Data Manipulation

In [None]:
# Add a new column to employees
employees_df['Salary_Category'] = employees_df['Salary'].apply(
    lambda x: 'High' if x >= 80000 else ('Medium' if x >= 60000 else 'Low')
)
print("Employees with Salary Categories:")
employees_df

In [None]:
# Sort by years of experience
sorted_by_exp = employees_df.sort_values('Years_Experience', ascending=False)
print("Employees sorted by experience:")
sorted_by_exp[['Name', 'Department', 'Years_Experience', 'Salary']]

## 7. Practice Exercise

Try these exercises:
1. Find the employee with the highest salary
2. Calculate the average years of experience by department
3. Find products with average rating above 4.5
4. Calculate the total stock across all products

In [None]:
# Your practice code here
