In [2]:
import requests
import pandas as pd

def fetch_orders():
    try:
        orders_request = requests.get("http://127.0.0.1:8000/orders") 
        orders_request.raise_for_status()
        orders = orders_request.json()
        
        # If the response is a list, we can directly use it to create a DataFrame
        if isinstance(orders, list):
            return pd.DataFrame(orders)  
        elif isinstance(orders, dict) and 'orders' in orders:
            return pd.DataFrame(orders['orders'])  # Create DataFrame from dictionary if 'orders' key exists
        else:
            print("Unexpected response format")
            return None
    except requests.exceptions.RequestException as e:
        print(f"Error fetching orders data: {e}")
        return None

def fetch_reservations():
    try:
        reservations_request = requests.get("http://127.0.0.1:8000/reservations")  # Your endpoint for reservations
        reservations_request.raise_for_status()
        reservations = reservations_request.json()
        
        if isinstance(reservations, list):
            return pd.DataFrame(reservations)  # Create DataFrame from list of reservations
        elif isinstance(reservations, dict) and 'reservations' in reservations:
            return pd.DataFrame(reservations['reservations']) 
        else:
            print("Unexpected response format")
            return None
    except requests.exceptions.RequestException as e:
        print(f"Error fetching reservations data: {e}")
        return None

# Fetch the data
orders_df = fetch_orders()
reservations_df = fetch_reservations()

# Check if data is fetched and print the number of rows and columns (shape)
if orders_df is not None:
    print(f"Orders DataFrame fetched: {orders_df.shape[0]} rows, {orders_df.shape[1]} columns")

if reservations_df is not None:
    print(f"Reservations DataFrame fetched: {reservations_df.shape[0]} rows, {reservations_df.shape[1]} columns")

Orders DataFrame fetched: 100 rows, 7 columns
Reservations DataFrame fetched: 100 rows, 7 columns


In [4]:
# Check the fetched data
if orders_df is not None:
    print("Orders Dataframe:")
    print(orders_df.head())
    print("Info:")
    print(orders_df.info())

if reservations_df is not None:
    print("Reservations Dataframe:")
    print(reservations_df.head())
    print("Info:")
    print(reservations_df.info())

Orders Dataframe:
   customer_name                                            items  \
0       John Doe                           Pizza Margherita, Coke   
1   Maria Garcia          Pasta Carbonara, Garlic Bread, Tiramisu   
2   James Wilson        Pepperoni Pizza, Buffalo Wings, Root Beer   
3  Sarah Johnson  Cheeseburger, French Fries, Chocolate Milkshake   
4  Michael Smith          Chicken Alfredo, Caesar Salad, Iced Tea   

   total_amount     status  id                  created_at  \
0         15.99  completed   1  2024-11-22T06:52:58.644309   
1         32.99    pending   2  2024-11-22T06:56:54.244209   
2         28.50    pending   3  2024-11-22T06:57:10.385902   
3         19.99    pending   4  2024-11-22T06:57:22.173180   
4         26.99  confirmed   5  2024-11-22T06:59:34.063606   

                   updated_at  
0  2024-11-22T06:55:35.890294  
1  2024-11-22T06:56:54.244209  
2  2024-11-22T06:57:10.385902  
3  2024-11-22T06:57:22.173180  
4  2024-11-22T06:59:34.063606  
In

In [6]:
# Describing the Datasets
if orders_df is not None:
    print("Orders Dataset Description:")
    print(orders_df.describe(include='all'))

if reservations_df is not None:
    print("Reservations Dataset Description:")
    print(reservations_df.describe(include='all'))

Orders Dataset Description:
       customer_name                                    items  total_amount  \
count            100                                      100    100.000000   
unique           100                                       99           NaN   
top         John Doe  Chicken Alfredo, Caesar Salad, Iced Tea           NaN   
freq               1                                        2           NaN   
mean             NaN                                      NaN     51.021400   
std              NaN                                      NaN     26.205011   
min              NaN                                      NaN     11.740000   
25%              NaN                                      NaN     26.807500   
50%              NaN                                      NaN     50.725000   
75%              NaN                                      NaN     68.637500   
max              NaN                                      NaN     99.280000   

         status        

In [10]:
# Check missing after  
print("Missing Values After Cleaning in Orders DataFrame:")
print(orders_df.isnull().sum())

print("Missing Values After Cleaning in Reservations DataFrame:")
print(reservations_df.isnull().sum())

Missing Values After Cleaning in Orders DataFrame:
customer_name    0
items            0
total_amount     0
status           0
id               0
created_at       0
updated_at       0
dtype: int64
Missing Values After Cleaning in Reservations DataFrame:
customer_name       0
party_size          0
reservation_time    0
contact_number      0
id                  0
is_confirmed        0
created_at          0
dtype: int64


In [15]:
# Replace missing values in orders dataset
orders_df.fillna({"total_amount": orders_df["total_amount"].mean(),
                  "status": "Unknown",
                  "created_at": pd.to_datetime("today"),
                  "updated_at": pd.to_datetime("today")}, inplace=True)

# For reservations dataset
reservations_df.fillna({"party_size": reservations_df["party_size"].mean(),
                         "reservation_time": pd.to_datetime("today"),
                         "contact_number": "Unknown",
                         "is_confirmed": False}, inplace=True)

In [17]:
# Check missing values after cleaning
print("Missing Values After Cleaning in Orders DataFrame:")
print(orders_df.isnull().sum())

print("Missing Values After Cleaning in Reservations DataFrame:")
print(reservations_df.isnull().sum())

Missing Values After Cleaning in Orders DataFrame:
customer_name    0
items            0
total_amount     0
status           0
id               0
created_at       0
updated_at       0
dtype: int64
Missing Values After Cleaning in Reservations DataFrame:
customer_name       0
party_size          0
reservation_time    0
contact_number      0
id                  0
is_confirmed        0
created_at          0
dtype: int64


In [19]:
# Data Preprocessing 
# Remove duplicates
orders_df.drop_duplicates(inplace=True)
reservations_df.drop_duplicates(inplace=True)

# Convert datetime columns
orders_df["created_at"] = pd.to_datetime(orders_df["created_at"])
orders_df["updated_at"] = pd.to_datetime(orders_df["updated_at"])

reservations_df["reservation_time"] = pd.to_datetime(reservations_df["reservation_time"])
reservations_df["created_at"] = pd.to_datetime(reservations_df["created_at"])

# Ensure that 'total_amount' is numeric
orders_df["total_amount"] = pd.to_numeric(orders_df["total_amount"], errors="coerce")

In [21]:
# Create new features for orders
orders_df['order_day'] = orders_df['created_at'].dt.day_name()
orders_df['order_hour'] = orders_df['created_at'].dt.hour
orders_df['is_weekend'] = orders_df['order_day'].isin(['Saturday', 'Sunday'])

# Create new features for reservations
reservations_df['reservation_day'] = reservations_df['reservation_time'].dt.day_name()
reservations_df['reservation_hour'] = reservations_df['reservation_time'].dt.hour
reservations_df['is_peak_time'] = reservations_df['reservation_hour'].between(18, 21)

In [23]:
# Display the first few rows to check the new features
print("Orders Dataset with New Features:")
print(orders_df.head())

print("Reservations Dataset with New Features:")
print(reservations_df.head())

# Final Data Shapes and Info
print("Cleaned Orders Dataset Shape:")
print(orders_df.shape)

print("Cleaned Orders Dataset Info:")
print(orders_df.info())

print("Cleaned Reservations Dataset Shape:")
print(reservations_df.shape)

print("Cleaned Reservations Dataset Info:")
print(reservations_df.info())

Orders Dataset with New Features:
   customer_name                                            items  \
0       John Doe                           Pizza Margherita, Coke   
1   Maria Garcia          Pasta Carbonara, Garlic Bread, Tiramisu   
2   James Wilson        Pepperoni Pizza, Buffalo Wings, Root Beer   
3  Sarah Johnson  Cheeseburger, French Fries, Chocolate Milkshake   
4  Michael Smith          Chicken Alfredo, Caesar Salad, Iced Tea   

   total_amount     status  id                 created_at  \
0         15.99  completed   1 2024-11-22 06:52:58.644309   
1         32.99    pending   2 2024-11-22 06:56:54.244209   
2         28.50    pending   3 2024-11-22 06:57:10.385902   
3         19.99    pending   4 2024-11-22 06:57:22.173180   
4         26.99  confirmed   5 2024-11-22 06:59:34.063606   

                  updated_at order_day  order_hour  is_weekend  
0 2024-11-22 06:55:35.890294    Friday           6       False  
1 2024-11-22 06:56:54.244209    Friday           6    