# Data Analysis and CSV Generation
This notebook demonstrates data analysis and CSV file generation

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Create sample data
np.random.seed(42)
dates = pd.date_range('2024-01-01', periods=100, freq='D')
sales = np.random.normal(1000, 200, 100) + np.sin(np.arange(100) * 0.1) * 100
customers = np.random.poisson(50, 100)

# Create DataFrame
df = pd.DataFrame({
    'Date': dates,
    'Sales': sales.round(2),
    'Customers': customers,
    'Avg_Order_Value': (sales / customers).round(2)
})

print("Sample data created:")
print(df.head(10))
print(f"\nDataset shape: {df.shape}")

In [None]:
# Perform some analysis
print("Data Analysis Summary:")
print("=" * 30)
print(f"Total Sales: ${df['Sales'].sum():,.2f}")
print(f"Average Daily Sales: ${df['Sales'].mean():,.2f}")
print(f"Total Customers: {df['Customers'].sum():,}")
print(f"Average Daily Customers: {df['Customers'].mean():.1f}")
print(f"Overall Average Order Value: ${df['Avg_Order_Value'].mean():.2f}")

# Create visualization
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))

# Sales trend
ax1.plot(df['Date'], df['Sales'], 'b-', linewidth=2, alpha=0.7)
ax1.set_title('Daily Sales Trend', fontsize=14, fontweight='bold')
ax1.set_ylabel('Sales ($)', fontsize=12)
ax1.grid(True, alpha=0.3)
ax1.tick_params(axis='x', rotation=45)

# Customer count
ax2.plot(df['Date'], df['Customers'], 'g-', linewidth=2, alpha=0.7)
ax2.set_title('Daily Customer Count', fontsize=14, fontweight='bold')
ax2.set_xlabel('Date', fontsize=12)
ax2.set_ylabel('Number of Customers', fontsize=12)
ax2.grid(True, alpha=0.3)
ax2.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

print("\nVisualization completed!")

In [None]:
# Generate summary statistics and save to CSV
monthly_summary = df.set_index('Date').resample('M').agg({
    'Sales': ['sum', 'mean', 'std'],
    'Customers': ['sum', 'mean'],
    'Avg_Order_Value': 'mean'
}).round(2)

# Flatten column names
monthly_summary.columns = ['_'.join(col).strip() for col in monthly_summary.columns]
monthly_summary = monthly_summary.reset_index()

print("Monthly Summary:")
print(monthly_summary)

# Save to CSV files
print("\nSaving data to CSV files...")

# Save daily data
df.to_csv('daily_sales_data.csv', index=False)
print("✅ Saved: daily_sales_data.csv")

# Save monthly summary
monthly_summary.to_csv('monthly_summary.csv', index=False)
print("✅ Saved: monthly_summary.csv")

# Create and save top performing days
top_days = df.nlargest(10, 'Sales')[['Date', 'Sales', 'Customers', 'Avg_Order_Value']]
top_days.to_csv('top_performing_days.csv', index=False)
print("✅ Saved: top_performing_days.csv")

print("\n📊 All CSV files have been generated successfully!")
print("Files created:")
print("- daily_sales_data.csv (100 rows)")
print(f"- monthly_summary.csv ({len(monthly_summary)} rows)")
print("- top_performing_days.csv (10 rows)")

In [None]:
# Verify the CSV files were created and show their contents
import os

print("Verifying generated files:")
print("=" * 40)

csv_files = [f for f in os.listdir('.') if f.endswith('.csv')]
for csv_file in csv_files:
    file_size = os.path.getsize(csv_file)
    print(f"📄 {csv_file}: {file_size} bytes")
    
    # Show preview of each file
    preview_df = pd.read_csv(csv_file)
    print(f"   Shape: {preview_df.shape}")
    print(f"   Columns: {list(preview_df.columns)}")
    print(f"   First 3 rows:")
    print(preview_df.head(3).to_string(index=False))
    print()

print("🎉 CSV generation and verification completed!")