# Practical Exercises: Working with CSV and Excel Files

This notebook contains the exercises and their solutions for working with CSV and Excel files in Python.

## Exercise 1: Create and Read a CSV File

Create a CSV file with sample data (e.g., student records) and write a Python script to read and display its contents.

In [None]:
import csv

# Create a CSV file with student records
students = [
    ['ID', 'Name', 'Age', 'Grade'],
    [1, 'Alice', 20, 'A'],
    [2, 'Bob', 22, 'B'],
    [3, 'Charlie', 21, 'A-'],
    [4, 'David', 23, 'B+']
]

with open('students.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(students)

# Read and display the contents of the CSV file
with open('students.csv', 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        print(', '.join(row))

## Exercise 2: Create a Simple Excel Spreadsheet

Write a program that takes user input to create a simple spreadsheet (e.g., a budget tracker) and saves it as an Excel file.

In [None]:
from openpyxl import Workbook
from openpyxl.styles import Font

# Create a new workbook and select the active sheet
wb = Workbook()
sheet = wb.active
sheet.title = "Budget Tracker"

# Set up headers
headers = ["Date", "Description", "Amount", "Category"]
for col, header in enumerate(headers, start=1):
    cell = sheet.cell(row=1, column=col, value=header)
    cell.font = Font(bold=True)

# Get user input for budget items
row = 2
while True:
    date = input("Enter date (or 'q' to quit): ")
    if date.lower() == 'q':
        break
    description = input("Enter description: ")
    amount = float(input("Enter amount: "))
    category = input("Enter category: ")
    
    sheet.append([date, description, amount, category])
    row += 1

# Save the workbook
wb.save("budget_tracker.xlsx")
print("Budget tracker saved as 'budget_tracker.xlsx'")

## Exercise 3: Analyze Sales Data

Read a CSV file containing sales data, calculate total sales and average sale value, then write the results to a new CSV file.

In [None]:
import csv
from statistics import mean

# First, let's create a sample sales data CSV file
sales_data = [
    ['Date', 'Product', 'Quantity', 'Price'],
    ['2023-01-01', 'Widget A', 5, 10.99],
    ['2023-01-02', 'Widget B', 3, 15.99],
    ['2023-01-03', 'Widget A', 2, 10.99],
    ['2023-01-04', 'Widget C', 7, 5.99],
    ['2023-01-05', 'Widget B', 4, 15.99]
]

with open('sales_data.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(sales_data)

# Now, let's read the CSV file and perform the analysis
total_sales = 0
sale_values = []

with open('sales_data.csv', 'r') as file:
    reader = csv.DictReader(file)
    for row in reader:
        quantity = int(row['Quantity'])
        price = float(row['Price'])
        sale_value = quantity * price
        total_sales += sale_value
        sale_values.append(sale_value)

average_sale = mean(sale_values)

# Write the results to a new CSV file
with open('sales_analysis.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Metric', 'Value'])
    writer.writerow(['Total Sales', f'${total_sales:.2f}'])
    writer.writerow(['Average Sale Value', f'${average_sale:.2f}'])

print(f"Total Sales: ${total_sales:.2f}")
print(f"Average Sale Value: ${average_sale:.2f}")
print("Analysis saved to 'sales_analysis.csv'")

## Exercise 4: Multi-sheet Excel File Reader

Create an Excel file with multiple sheets, each containing different types of data (e.g., employee information, sales data). Write a script to read this file and print a summary of each sheet.

In [None]:
from openpyxl import Workbook, load_workbook

# Create a sample multi-sheet Excel file
wb = Workbook()

# Employee Information Sheet
emp_sheet = wb.active
emp_sheet.title = "Employees"
emp_sheet.append(["ID", "Name", "Department", "Salary"])
emp_sheet.append([1, "John Doe", "Sales", 50000])
emp_sheet.append([2, "Jane Smith", "Marketing", 55000])
emp_sheet.append([3, "Bob Johnson", "IT", 60000])

# Sales Data Sheet
sales_sheet = wb.create_sheet(title="Sales")
sales_sheet.append(["Date", "Product", "Quantity", "Revenue"])
sales_sheet.append(["2023-01-01", "Widget A", 100, 1000])
sales_sheet.append(["2023-01-02", "Widget B", 150, 2250])
sales_sheet.append(["2023-01-03", "Widget C", 75, 1125])

wb.save("company_data.xlsx")

# Read and summarize the Excel file
wb = load_workbook("company_data.xlsx")

for sheet_name in wb.sheetnames:
    sheet = wb[sheet_name]
    print(f"\nSummary of '{sheet_name}' sheet:")
    print(f"Number of rows: {sheet.max_row}")
    print(f"Number of columns: {sheet.max_column}")
    print("Headers:")
    for cell in sheet[1]:
        print(f"  - {cell.value}")
    
    if sheet_name == "Employees":
        total_salary = sum(cell.value for cell in sheet['D'][1:])
        print(f"Total salary: ${total_salary}")
    elif sheet_name == "Sales":
        total_revenue = sum(cell.value for cell in sheet['D'][1:])
        print(f"Total revenue: ${total_revenue}")

## Exercise 5: Data Analysis with Pandas

Use `pandas` to read a large CSV file (you can generate sample data), perform some basic analysis (e.g., calculate mean, median, mode of a column), and export the results to both CSV and Excel formats.

In [None]:
import pandas as pd
import numpy as np

# Generate a large sample dataset
np.random.seed(0)
data = {
    'ID': range(1, 10001),
    'Age': np.random.randint(18, 65, 10000),
    'Salary': np.random.normal(50000, 15000, 10000),
    'Years_Experience': np.random.randint(0, 30, 10000)
}

df = pd.DataFrame(data)

# Save the generated data to a CSV file
df.to_csv('large_dataset.csv', index=False)

# Read the CSV file using pandas
df = pd.read_csv('large_dataset.csv')

# Perform basic analysis
analysis = {
    'Age': {
        'Mean': df['Age'].mean(),
        'Median': df['Age'].median(),
        'Mode': df['Age'].mode().iloc[0]
    },
    'Salary': {
        'Mean': df['Salary'].mean(),
        'Median': df['Salary'].median(),
        'Mode': df['Salary'].mode().iloc[0]
    },
    'Years_Experience': {
        'Mean': df['Years_Experience'].mean(),
        'Median': df['Years_Experience'].median(),
        'Mode': df['Years_Experience'].mode().iloc[0]
    }
}

# Create a DataFrame from the analysis results
analysis_df = pd.DataFrame(analysis)

# Export results to CSV
analysis_df.to_csv('analysis_results.csv')

# Export results to Excel
analysis_df.to_excel('analysis_results.xlsx')

print("Analysis results:")
print(analysis_df)
print("\nResults exported to 'analysis_results.csv' and 'analysis_results.xlsx'")