In [2]:
import json
import pandas as pd
from dotenv import dotenv_values
from sqlalchemy import create_engine

config = dotenv_values()
engine = create_engine(config["DB_URI"])


def parse_json_string(json_str):
    """Parse JSON string back to Python object."""
    if pd.isna(json_str) or json_str is None:
        return None
    try:
        return json.loads(json_str)
    except (json.JSONDecodeError, TypeError):
        return json_str  # Return as-is if not valid JSON

# Construct SQL query
query = f"SELECT * FROM data"
# Load data from database
df = pd.read_sql(query, con=engine, index_col=None)

print(f"Successfully loaded {len(df)} rows from 'data' table")

# Convert JSON string columns back to Python objects
if 'category' in df.columns:
    df['category'] = df['category'].apply(parse_json_string)
if 'images' in df.columns:
    df['images'] = df['images'].apply(parse_json_string)

df.info()

Successfully loaded 1000000 rows from 'data' table
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000000 entries, 0 to 999999
Data columns (total 8 columns):
 #   Column                Non-Null Count    Dtype 
---  ------                --------------    ----- 
 0   review_time           1000000 non-null  int64 
 1   rating                1000000 non-null  int64 
 2   uuid                  1000000 non-null  object
 3   has_image             1000000 non-null  bool  
 4   images                1000000 non-null  object
 5   review_cleaned        1000000 non-null  object
 6   business_description  513735 non-null   object
 7   category              999763 non-null   object
dtypes: bool(1), int64(2), object(5)
memory usage: 54.4+ MB


In [3]:
df.head(n=5)

Unnamed: 0,review_time,rating,uuid,has_image,images,review_cleaned,business_description,category
0,1570668707682,3,11dd1bf5-0e05-4f58-b24e-f9e05a6937a3,False,[],,,[Auto painting]
1,1549240941555,4,77a38301-8c7f-491e-adaf-8987f1d9b11f,False,[],,"Fast, casual seafood restaurant chain serving ...","[Seafood restaurant, American restaurant, Fami..."
2,1538599650146,5,8bf3b257-3636-46ec-a420-905ddeeb12ec,False,[],YUM!!!! Definitely beat the twice-as-expensive...,Circa-1950 fast-food chain for counter-serve b...,"[Restaurant, Breakfast restaurant, Chicken res..."
3,1543200396651,1,517cdc49-418b-4bae-8d91-3df5279d380e,False,[],Room was not cleaned very well. Found skittles...,"Casual lodging with an outdoor pool, plus free...","[Hotel, Business center, Indoor lodging]"
4,1472363463561,3,61f29b12-b632-473d-97c4-341b35e3ac4b,False,[],,Simple chain spot offering American diner fare...,"[Breakfast restaurant, American restaurant, Di..."


In [7]:
df.sample(n=10)[['has_image', 'images', 'review_cleaned', 'business_description', 'category']]

Unnamed: 0,has_image,images,review_cleaned,business_description,category
260445,False,[],My favorite store in Cullman.,,[Sporting goods store]
581381,False,[],,,[Mexican restaurant]
958786,False,[],,"Fast-food chain serving fried chicken, big bur...","[Fast food restaurant, Hamburger restaurant]"
876907,False,[],,This museum about the people & events surround...,"[History museum, Museum, Tourist attraction]"
936339,False,[],Super delicious pizza and great fresh salad :t...,Pizzeria chain offering an array of craft beer...,[Pizza restaurant]
931968,False,[],Self check out is great,,"[Department store, Clothing store, Craft store..."
973113,False,[],,"Chain featuring a range of pets, supplies & fo...","[Pet supply store, Pet adoption service, Pet g..."
168682,False,[],Remodle was well worth it but the katchup pack...,"Classic, long-running fast-food chain known fo...","[Fast food restaurant, Breakfast restaurant, C..."
703126,False,[],,,[Honda dealer]
525872,False,[],Best sirloin steak and cesar salad. Will be ba...,Lively chain steakhouse serving American fare ...,"[Steak house, American restaurant, New America..."


In [6]:
df.sample(n=10)[['category', "review_cleaned", "review_cleaned"]]

Unnamed: 0,category,review_cleaned,review_cleaned.1
843462,[Toy store],,
884205,"[Italian restaurant, Caterer, Family restauran...",10 adults & 1 child. The service was quick in ...,10 adults & 1 child. The service was quick in ...
479749,"[Hotel, Business center, Indoor lodging]",Easy acces to highway. Friendly staff. Ample p...,Easy acces to highway. Friendly staff. Ample p...
418202,"[Toyota dealer, Auto parts store, Auto repair ...",Satisfied with the good customer service. My c...,Satisfied with the good customer service. My c...
876167,"[Fabric store, Baking supply store, Bead store...",,
243305,"[Department store, Baby store, Clothing store,...",This store is very well kept and clean. The wh...,This store is very well kept and clean. The wh...
487822,"[Outdoor sports store, Clothing store, Exercis...",,
429553,"[Furniture store, Glassware store, Kitchen sup...",,
667626,[Ophthalmologist],Dr. Vick provided a clear diagnosis and option...,Dr. Vick provided a clear diagnosis and option...
307556,"[American restaurant, Mexican restaurant, Rest...","I enjoyed their variety of menu selections, I ...","I enjoyed their variety of menu selections, I ..."
