# Pandas Tutorial: Comprehensive Guide
## Topics Covered:
- Series and DataFrame creation
- Loading JSON files
- Slicing and indexing techniques
- Passing Series/DataFrames to functions
- Exporting DataFrames to JSON

In [None]:
import pandas as pd
import numpy as np
import json

## 1. Creating Series from Different Collections

In [None]:
# From Python list
series_from_list = pd.Series([10, 20, 30, 40, 50])
print("Series from list:")
print(series_from_list)
print()

In [None]:
# From dictionary
series_from_dict = pd.Series({'a': 100, 'b': 200, 'c': 300, 'd': 400})
print("Series from dictionary:")
print(series_from_dict)
print()

In [None]:
# From NumPy array
np_array = np.array([5, 15, 25, 35, 45])
series_from_numpy = pd.Series(np_array, index=['first', 'second', 'third', 'fourth', 'fifth'])
print("Series from NumPy array:")
print(series_from_numpy)
print()

## 2. Creating DataFrames from Different Collections

In [None]:
# From dictionary of lists
data_dict = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Age': [25, 30, 35, 28, 32],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix'],
    'Salary': [70000, 80000, 75000, 82000, 78000]
}
df_from_dict = pd.DataFrame(data_dict)
print("DataFrame from dictionary:")
print(df_from_dict)
print()

In [None]:
# From list of dictionaries
data_list = [
    {'Product': 'Laptop', 'Price': 1200, 'Stock': 15},
    {'Product': 'Mouse', 'Price': 25, 'Stock': 100},
    {'Product': 'Keyboard', 'Price': 75, 'Stock': 50},
    {'Product': 'Monitor', 'Price': 300, 'Stock': 30}
]
df_from_list = pd.DataFrame(data_list)
print("DataFrame from list of dictionaries:")
print(df_from_list)
print()

In [None]:
# From NumPy array
np_data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
df_from_numpy = pd.DataFrame(np_data, columns=['Column_A', 'Column_B', 'Column_C'])
print("DataFrame from NumPy array:")
print(df_from_numpy)
print()

## 3. Loading Data from JSON File

In [None]:
# First, let's create a sample JSON file
sample_data = {
    'employees': [
        {'id': 1, 'name': 'John Smith', 'department': 'Engineering', 'salary': 95000, 'years_experience': 5},
        {'id': 2, 'name': 'Emma Wilson', 'department': 'Marketing', 'salary': 72000, 'years_experience': 3},
        {'id': 3, 'name': 'Michael Brown', 'department': 'Engineering', 'salary': 105000, 'years_experience': 8},
        {'id': 4, 'name': 'Sarah Davis', 'department': 'HR', 'salary': 68000, 'years_experience': 4},
        {'id': 5, 'name': 'James Johnson', 'department': 'Sales', 'salary': 85000, 'years_experience': 6}
    ]
}

# Save to JSON file
with open('employees.json', 'w') as f:
    json.dump(sample_data, f, indent=2)

print("JSON file 'employees.json' created successfully!")

In [None]:
# Load DataFrame from JSON file
df_from_json = pd.read_json('employees.json')
print("DataFrame loaded from JSON:")
print(df_from_json)
print()
print("DataFrame info:")
print(df_from_json.info())

## 4. Slicing and Indexing Techniques
### 4.1 Series Indexing

In [None]:
# Create a sample series
temperatures = pd.Series([22, 25, 28, 24, 26, 23, 27], 
                        index=['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])
print("Temperature series:")
print(temperatures)
print()

# Single element access
print(f"Temperature on Monday: {temperatures['Mon']}")
print(f"Temperature at position 0: {temperatures[0]}")
print()

# Slicing
print("Weekday temperatures (Mon-Fri):")
print(temperatures['Mon':'Fri'])
print()

# Boolean indexing
print("Days with temperature > 25:")
print(temperatures[temperatures > 25])
print()

### 4.2 DataFrame Indexing with loc and iloc

In [None]:
# Using the employees DataFrame
df = df_from_json.copy()

# Access single column
print("Names column:")
print(df['employees'][0]['name'])
print()

# Let's normalize the JSON for better access
df_employees = pd.json_normalize(sample_data['employees'])
print("Normalized DataFrame:")
print(df_employees)
print()

In [None]:
# loc - label-based indexing
print("Using loc to access row 0:")
print(df_employees.loc[0])
print()

print("Using loc to access specific rows and columns:")
print(df_employees.loc[0:2, ['name', 'salary']])
print()

In [None]:
# iloc - integer position-based indexing
print("Using iloc to access first 3 rows and first 3 columns:")
print(df_employees.iloc[0:3, 0:3])
print()

print("Using iloc to access specific positions:")
print(df_employees.iloc[[0, 2, 4], [1, 3]])
print()

### 4.3 Boolean Indexing

In [None]:
# Filter employees with salary > 80000
high_earners = df_employees[df_employees['salary'] > 80000]
print("Employees with salary > $80,000:")
print(high_earners)
print()

# Multiple conditions
experienced_engineers = df_employees[
    (df_employees['department'] == 'Engineering') & 
    (df_employees['years_experience'] > 5)
]
print("Experienced engineers (>5 years):")
print(experienced_engineers)
print()

### 4.4 Advanced Slicing

In [None]:
# Select multiple columns
print("Selected columns:")
print(df_employees[['name', 'department', 'salary']])
print()

# Using query method
print("Using query method to filter:")
print(df_employees.query('salary >= 85000 and years_experience >= 5'))
print()

## 5. Passing Series and DataFrames to Functions

In [None]:
# Function that takes a Series
def calculate_statistics(series):
    """Calculate basic statistics for a Series"""
    return {
        'mean': series.mean(),
        'median': series.median(),
        'std': series.std(),
        'min': series.min(),
        'max': series.max()
    }

# Apply function to salary series
salary_stats = calculate_statistics(df_employees['salary'])
print("Salary statistics:")
for key, value in salary_stats.items():
    print(f"{key}: ${value:,.2f}")
print()

In [None]:
# Function that takes a DataFrame
def add_bonus_column(df, bonus_percentage=0.10):
    """Add a bonus column based on salary"""
    df_copy = df.copy()
    df_copy['bonus'] = df_copy['salary'] * bonus_percentage
    df_copy['total_compensation'] = df_copy['salary'] + df_copy['bonus']
    return df_copy

df_with_bonus = add_bonus_column(df_employees, bonus_percentage=0.15)
print("DataFrame with bonus calculations:")
print(df_with_bonus[['name', 'salary', 'bonus', 'total_compensation']])
print()

In [None]:
# Using apply() with custom functions
def categorize_experience(years):
    """Categorize experience level"""
    if years < 3:
        return 'Junior'
    elif years < 6:
        return 'Mid-level'
    else:
        return 'Senior'

df_employees['experience_level'] = df_employees['years_experience'].apply(categorize_experience)
print("DataFrame with experience levels:")
print(df_employees[['name', 'years_experience', 'experience_level']])
print()

In [None]:
# Function that operates on DataFrame rows
def calculate_salary_per_year(row):
    """Calculate average salary per year of experience"""
    if row['years_experience'] > 0:
        return row['salary'] / row['years_experience']
    return 0

df_employees['salary_per_year'] = df_employees.apply(calculate_salary_per_year, axis=1)
print("Salary per year of experience:")
print(df_employees[['name', 'salary', 'years_experience', 'salary_per_year']])
print()

## 6. Exporting DataFrames to JSON

In [None]:
# Export to JSON with default orientation (columns)
df_employees.to_json('employees_output.json', orient='columns', indent=2)
print("Exported to 'employees_output.json' with orient='columns'")
print()

In [None]:
# Export to JSON with records orientation (list of dictionaries)
df_employees.to_json('employees_records.json', orient='records', indent=2)
print("Exported to 'employees_records.json' with orient='records'")
print()

# Display the content
with open('employees_records.json', 'r') as f:
    print("Content of employees_records.json:")
    print(f.read())

In [None]:
# Export to JSON with index orientation
df_employees.to_json('employees_index.json', orient='index', indent=2)
print("Exported to 'employees_index.json' with orient='index'")
print()

In [None]:
# Export specific columns only
df_employees[['name', 'department', 'salary']].to_json(
    'employees_summary.json', 
    orient='records', 
    indent=2
)
print("Exported selected columns to 'employees_summary.json'")
print()

# Verify the export
with open('employees_summary.json', 'r') as f:
    print("Content of employees_summary.json:")
    print(f.read())

## Summary

This tutorial covered:
1. **Creating Series and DataFrames** from lists, dictionaries, and NumPy arrays
2. **Loading JSON files** into DataFrames
3. **Slicing and indexing** using loc, iloc, boolean indexing, and query
4. **Passing Series and DataFrames to functions** and using apply()
5. **Exporting DataFrames to JSON** with different orientations

### Key Takeaways:
- Use `loc` for label-based indexing and `iloc` for position-based indexing
- Boolean indexing is powerful for filtering data
- Functions can operate on entire Series/DataFrames or row-by-row with `apply()`
- JSON export orientation affects the output structure (records, columns, index, etc.)