In [1]:
# Sales Data CRUD and Dash Visualization
# Jupyter Notebook Skeleton for Local Execution

# --- Cell 1: Install Dependencies (run once) ---
!pip install pymongo pandas dash plotly



In [2]:
import certifi
from pymongo import MongoClient

client = MongoClient(
  "mongodb+srv://charbelfrancis03:hUmsVOJeW3LRlLZ6@cluster0.8otd0br.mongodb.net/sales_db?retryWrites=true&w=majority",
  tls=True,
  tlsCAFile=certifi.where()
)


In [3]:
db = client.sales_db
sales = db.sales


In [4]:
from pymongo.errors import ConnectionFailure

try:
    client.admin.command("ping")
    print("✅ Connected to local MongoDB!")
except ConnectionFailure as e:
    print("❌ Could not connect to local MongoDB:", e)


✅ Connected to local MongoDB!


In [5]:
# Load CSV and Insert into MongoDB
import pandas as pd
import datetime

# Read the sales CSV
df = pd.read_csv("sales_data.csv")
print(f"Read {len(df)} rows from CSV")

# Rename columns to match our CRUD code
df = df.rename(columns={
    "store_location": "store",
    "sales_amount":   "sales",
    "product_id":     "product"
})
# Convert the date string into a true datetime for querying
df["date"] = pd.to_datetime(df["date"])

print(df.columns)   # you should now see ['date','product','sales','store']


Read 120 rows from CSV
Index(['date', 'product', 'sales', 'store'], dtype='object')


In [6]:
print("Total docs in sales:", sales.count_documents({}))
print("Sample record:", sales.find_one())


Total docs in sales: 122
Sample record: {'_id': ObjectId('68502738f65a2a44f443032d'), 'date': '2023-09-01', 'product_id': 'P002', 'sales_amount': 253, 'store_location': 'New York'}


In [7]:
# CREATE: add one more record
import datetime

new_sale = {
    "store":   "New Store",
    "date":    datetime.datetime(2025, 6, 16),
    "product": "Product X",
    "sales":   250.00
}
cre = sales.insert_one(new_sale)
print("Created record _id:", cre.inserted_id)

# READ: show all sales at Store A
print("\nSales at Store A:")
for doc in sales.find({"store": "Store A"}):
    print(doc)

# UPDATE: bump Product X sales +10%
upd = sales.update_many(
    {"product": "Product X"},
    {"$mul": {"sales": 1.10}}
)
print(f"\nUpdated {upd.modified_count} Product X records (+10%)")

# DELETE: remove any records before 2025-01-01
cutoff = datetime.datetime(2025, 1, 1)
del_res = sales.delete_many({"date": {"$lt": cutoff}})
print(f"\nDeleted {del_res.deleted_count} outdated records")


Created record _id: 68502d29c63ae0794a98a7c1

Sales at Store A:

Updated 3 Product X records (+10%)

Deleted 0 outdated records


In [8]:
# Re-build the aggregation on the real CSV field names
pipeline = [
    # Drop any docs where store_location is missing
    {"$match": {"store_location": {"$exists": True, "$nin": [None, ""]}}},
    {"$group": {"_id": "$store_location", "totalSales": {"$sum": "$sales_amount"}}},
    {"$sort":  {"totalSales": -1}}
]

agg = list(sales.aggregate(pipeline))

import pandas as pd
agg_df = (
    pd.DataFrame(agg)
      .rename(columns={"_id": "store"})
)
print(agg_df)


         store  totalSales
0      Chicago       15784
1  Los Angeles       15473
2      Houston       15209
3     New York       14916


In [9]:
# Dash in Jupyter
!pip install jupyter-dash   



In [10]:
# ─── Cell: Inline Dash Embed ───
from dash import Dash, html, dcc
import plotly.express as px
from IPython.display import IFrame, display
import threading

# 1) Build a new Dash app so it picks up the fresh agg_df
app = Dash(__name__)
fig = px.bar(
    agg_df,
    x="store",
    y="totalSales",
    title="Total Sales by Store (after CRUD ops)"
)
app.layout = html.Div([
    html.H1("Sales Dashboard"),
    dcc.Graph(figure=fig)
])

# 2) Run the server in a background thread (no reloader)
def run():
    app.run(port=8050, debug=False, use_reloader=False)

threading.Thread(target=run, daemon=True).start()

# 3) Embed via IFrame right here in the notebook
display(IFrame(src="http://127.0.0.1:8050", width="100%", height="500px"))
