In [1]:
# ChainEats Analytics - Day 3: Database Setup
import pandas as pd
import sqlite3
import os

print("ChainEats SQL Database Setup")
print("=" * 50)

# Create database connection
db_name = 'chaineats.db'
if os.path.exists(db_name):
    os.remove(db_name)  # Start fresh
    
conn = sqlite3.connect(db_name)
print(f"Created database: {db_name}")

# Load cleaned datasets
print("\nLoading cleaned datasets...")
locations_df = pd.read_csv('locations_cleaned.csv')
menu_df = pd.read_csv('menu_items_cleaned.csv')
sales_df = pd.read_csv('sales_data_cleaned.csv')
weather_df = pd.read_csv('weather_data_cleaned.csv')
daily_summary_df = pd.read_csv('daily_sales_summary.csv')
monthly_summary_df = pd.read_csv('monthly_sales_summary.csv')

# Convert date columns back to datetime
sales_df['date'] = pd.to_datetime(sales_df['date'])
weather_df['date'] = pd.to_datetime(weather_df['date'])
daily_summary_df['date'] = pd.to_datetime(daily_summary_df['date'])

print("All datasets loaded")

# Load data into SQLite tables
print("\nLoading data into SQL tables...")

# Main tables
locations_df.to_sql('locations', conn, if_exists='replace', index=False)
print(" locations table created")

menu_df.to_sql('menu_items', conn, if_exists='replace', index=False)
print(" menu_items table created")

sales_df.to_sql('sales', conn, if_exists='replace', index=False)
print(" sales table created")

weather_df.to_sql('weather', conn, if_exists='replace', index=False)
print(" weather table created")

# Summary tables for faster queries
daily_summary_df.to_sql('daily_summary', conn, if_exists='replace', index=False)
print(" daily_summary table created")

monthly_summary_df.to_sql('monthly_summary', conn, if_exists='replace', index=False)
print(" monthly_summary table created")

# Verify tables
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()

print(f"\nDatabase created with {len(tables)} tables:")
for table in tables:
    cursor.execute(f"SELECT COUNT(*) FROM {table[0]}")
    count = cursor.fetchone()[0]
    print(f"  {table[0]}: {count:,} records")

print("\nDatabase setup complete!")

ChainEats SQL Database Setup
Created database: chaineats.db

Loading cleaned datasets...
All datasets loaded

Loading data into SQL tables...
 locations table created
 menu_items table created
 sales table created
 weather table created
 daily_summary table created
 monthly_summary table created

Database created with 6 tables:
  locations: 50 records
  menu_items: 20 records
  sales: 13,605,203 records
  weather: 3,650 records
  daily_summary: 36,500 records
  monthly_summary: 1,200 records

Database setup complete!


In [3]:
# ChainEats Analytics - Day 3: Core Business SQL Queries
import pandas as pd
import sqlite3

print("ChainEats Business Analysis - SQL Queries")
print("=" * 50)

# Connect to database
conn = sqlite3.connect('chaineats.db')

# QUERY 1: Location Performance Ranking
print("\nQUERY 1: Location Performance Ranking")
print("-" * 40)

query1 = """
SELECT 
    l.location_id,
    l.city,
    l.location_type,
    l.monthly_rent,
    SUM(ms.monthly_revenue) as total_revenue,
    SUM(ms.monthly_profit) as total_profit,
    SUM(ms.net_profit) as total_net_profit,
    COUNT(*) as months_operating,
    ROUND(AVG(ms.monthly_revenue), 2) as avg_monthly_revenue,
    ROUND(SUM(ms.net_profit) * 1.0 / l.monthly_rent, 2) as rent_efficiency_ratio
FROM locations l
LEFT JOIN monthly_summary ms ON l.location_id = ms.location_id
GROUP BY l.location_id, l.city, l.location_type, l.monthly_rent
ORDER BY total_net_profit DESC
LIMIT 10;
"""

top_locations = pd.read_sql_query(query1, conn)
print("Top 10 Locations by Net Profit:")
print(top_locations[['location_id', 'city', 'location_type', 'total_net_profit', 'rent_efficiency_ratio']])

# QUERY 2: Underperforming Locations
print("\nQUERY 2: Underperforming Locations")
print("-" * 40)

query2 = """
SELECT 
    l.location_id,
    l.city,
    l.location_type,
    l.monthly_rent,
    SUM(ms.net_profit) as total_net_profit,
    COUNT(*) as months_operating,
    ROUND(AVG(ms.monthly_revenue), 2) as avg_monthly_revenue
FROM locations l
LEFT JOIN monthly_summary ms ON l.location_id = ms.location_id
GROUP BY l.location_id, l.city, l.location_type, l.monthly_rent
HAVING total_net_profit < 0
ORDER BY total_net_profit ASC
LIMIT 10;
"""

underperformers = pd.read_sql_query(query2, conn)
print("Underperforming Locations (Negative Net Profit):")
if len(underperformers) > 0:
    print(underperformers[['location_id', 'city', 'location_type', 'total_net_profit']])
else:
    print("No locations with negative net profit!")

# QUERY 3: Menu Item Performance
print("\n QUERY 3: Menu Item Performance Analysis")
print("-" * 40)

query3 = """
SELECT 
    m.item_name,
    m.category,
    m.price,
    m.profit_margin,
    COUNT(s.item_id) as times_ordered,
    SUM(s.revenue) as total_revenue,
    SUM(s.profit) as total_profit,
    ROUND(AVG(s.profit), 2) as avg_profit_per_order
FROM menu_items m
LEFT JOIN sales s ON m.item_id = s.item_id
GROUP BY m.item_id, m.item_name, m.category, m.price, m.profit_margin
ORDER BY total_profit DESC
LIMIT 10;
"""

top_menu_items = pd.read_sql_query(query3, conn)
print("Top 10 Menu Items by Total Profit:")
print(top_menu_items[['item_name', 'category', 'times_ordered', 'total_profit']])

# QUERY 4: Seasonal Performance Analysis
print("\nQUERY 4: Seasonal Performance Analysis")
print("-" * 40)

query4 = """
SELECT 
    season,
    COUNT(DISTINCT location_id) as locations,
    SUM(revenue) as total_revenue,
    SUM(profit) as total_profit,
    ROUND(AVG(revenue), 2) as avg_daily_revenue,
    COUNT(*) as total_transactions
FROM sales
GROUP BY season
ORDER BY total_profit DESC;
"""

seasonal_performance = pd.read_sql_query(query4, conn)
print("Performance by Season:")
print(seasonal_performance)

# QUERY 5: City Performance Comparison
print("\nQUERY 5: City Performance Comparison")
print("-" * 40)

query5 = """
SELECT 
    city,
    COUNT(DISTINCT location_id) as number_of_locations,
    SUM(daily_revenue) as total_revenue,
    SUM(daily_profit) as total_profit,
    ROUND(AVG(daily_revenue), 2) as avg_daily_revenue_per_location,
    ROUND(SUM(daily_profit) * 1.0 / COUNT(DISTINCT location_id), 2) as avg_profit_per_location
FROM daily_summary
GROUP BY city
ORDER BY avg_profit_per_location DESC;
"""

city_performance = pd.read_sql_query(query5, conn)
print("City Performance Comparison:")
print(city_performance)

# QUERY 6: Weekend vs Weekday Performance
print("\nQUERY 6: Weekend vs Weekday Analysis")
print("-" * 40)

query6 = """
SELECT 
    is_weekend,
    CASE WHEN is_weekend = 1 THEN 'Weekend' ELSE 'Weekday' END as day_type,
    COUNT(*) as total_transactions,
    SUM(revenue) as total_revenue,
    SUM(profit) as total_profit,
    ROUND(AVG(revenue), 2) as avg_transaction_value
FROM sales
GROUP BY is_weekend
ORDER BY is_weekend;
"""

weekend_analysis = pd.read_sql_query(query6, conn)
print("Weekend vs Weekday Performance:")
print(weekend_analysis[['day_type', 'total_transactions', 'total_revenue', 'avg_transaction_value']])

# QUERY 7: Location Type Performance
print("\nQUERY 7: Location Type Analysis")
print("-" * 40)

query7 = """
SELECT 
    location_type,
    COUNT(DISTINCT location_id) as number_of_locations,
    SUM(daily_revenue) as total_revenue,
    SUM(daily_profit) as total_profit,
    ROUND(AVG(daily_revenue), 2) as avg_daily_revenue,
    ROUND(AVG(daily_profit), 2) as avg_daily_profit
FROM daily_summary
GROUP BY location_type
ORDER BY avg_daily_profit DESC;
"""

location_type_performance = pd.read_sql_query(query7, conn)
print("Performance by Location Type:")
print(location_type_performance)

# Save query results for visualization
print("\nSaving Query Results...")
top_locations.to_csv('top_locations.csv', index=False)
top_menu_items.to_csv('top_menu_items.csv', index=False)
seasonal_performance.to_csv('seasonal_performance.csv', index=False)
city_performance.to_csv('city_performance.csv', index=False)
weekend_analysis.to_csv('weekend_analysis.csv', index=False)
location_type_performance.to_csv('location_type_performance.csv', index=False)

# Business Insights Summary
print("\nKEY BUSINESS INSIGHTS")
print("=" * 50)

best_city = city_performance.iloc[0]['city']
best_location_type = location_type_performance.iloc[0]['location_type']
best_season = seasonal_performance.iloc[0]['season']
best_menu_item = top_menu_items.iloc[0]['item_name']

print(f"Best performing city: {best_city}")
print(f"Best location type: {best_location_type}")
print(f"Best season: {best_season}")
print(f"Top menu item: {best_menu_item}")

# Weekend vs Weekday insight
weekend_boost = ((weekend_analysis[weekend_analysis['day_type']=='Weekend']['avg_transaction_value'].iloc[0] / 
                  weekend_analysis[weekend_analysis['day_type']=='Weekday']['avg_transaction_value'].iloc[0] - 1) * 100)
print(f"Weekend sales boost: {weekend_boost:.1f}% higher than weekdays")

conn.close()
print("\nSQL analysis done, ready for advanced queries.")

ChainEats Business Analysis - SQL Queries

QUERY 1: Location Performance Ranking
----------------------------------------
Top 10 Locations by Net Profit:
  location_id         city location_type  total_net_profit  \
0     LOC_024     New York       Airport      7.307664e+06   
1     LOC_048      Houston       Airport      7.038987e+06   
2     LOC_044      Chicago       Airport      5.659855e+06   
3     LOC_039  Los Angeles       Airport      5.399054e+06   
4     LOC_019     New York       Airport      5.347064e+06   
5     LOC_016      Chicago    Food Court      5.012856e+06   
6     LOC_034  Los Angeles    Food Court      4.343021e+06   
7     LOC_040      Houston    Food Court      4.050974e+06   
8     LOC_015  Los Angeles       Airport      3.857488e+06   
9     LOC_020     New York    Food Court      3.571336e+06   

   rent_efficiency_ratio  
0                 572.16  
1                 554.25  
2                 469.66  
3                 459.81  
4                 417.90  
5