In [2]:
import pandas as pd

# Load combined Google data from Week 4
df_google = pd.read_csv("google_all_restaurants_combined.csv")

# Normalize nested fields using dot notation for access
# (Assumes this step was already done in Week 4 using json_normalize with sep='.')
# But in case you're redoing from raw JSON:
# df_google = pd.json_normalize(google_data, sep='.')

# Add a source column
df_google['source'] = 'google'

# Rename fields for consistency
df_google.rename(columns={
    'formatted_address': 'address',
    'rating': 'rating',
    'price': 'price_level',
    'geometry.location.lat': 'latitude',
    'geometry.location.lng': 'longitude'
}, inplace=True)

# Some fields may not exist in all rows — ensure they exist to avoid KeyError
for col in ['address', 'rating', 'price_level', 'latitude', 'longitude']:
    if col not in df_google.columns:
        df_google[col] = None

# Keep only required columns
columns_needed = ['name', 'address', 'rating', 'price_level', 'latitude', 'longitude', 'source']
df_google_clean = df_google[columns_needed]

# Preview
print(df_google_clean.head())

# Optional: Save cleaned Google data
df_google_clean.to_csv("google_restaurants_cleaned.csv", index=False)
print("✅ Saved cleaned Google dataset to google_restaurants_cleaned.csv")


                           name  \
0           La Grande Boucherie   
1                    The Modern   
2                STK Steakhouse   
3              Tony's Di Napoli   
4  La Pecora Bianca Bryant Park   

                                            address  rating  price_level  \
0  145 W 53rd St, New York, NY 10019, United States     4.6          2.0   
1    9 W 53rd St, New York, NY 10019, United States     4.6          4.0   
2   1114 6th Ave, New York, NY 10036, United States     4.8          3.0   
3  147 W 43rd St, New York, NY 10036, United States     4.6          2.0   
4   20 W 40th St, New York, NY 10018, United States     4.7          3.0   

    latitude  longitude  source  
0  40.762634 -73.980836  google  
1  40.761081 -73.976753  google  
2  40.754721 -73.982759  google  
3  40.756462 -73.985397  google  
4  40.752518 -73.983153  google  
✅ Saved cleaned Google dataset to google_restaurants_cleaned.csv


In [6]:
import pandas as pd

# Load Foursquare data from Week 4
df_fsq = pd.read_csv("foursquare_all_restaurants_combined.csv")

# Add source column
df_fsq['source'] = 'foursquare'

# Rename fields for consistency
df_fsq.rename(columns={
    'location.formatted_address': 'address',
    'rating': 'rating',
    'price': 'price_level',
    'geocodes.main.latitude': 'latitude',
    'geocodes.main.longitude': 'longitude'
}, inplace=True)

# Ensure all necessary columns exist
for col in ['address', 'rating', 'price_level', 'latitude', 'longitude']:
    if col not in df_fsq.columns:
        df_fsq[col] = None

# Select only required columns
columns_needed = ['name', 'address', 'rating', 'price_level', 'latitude', 'longitude', 'source']
df_fsq_clean = df_fsq[columns_needed]

# Preview
print(df_fsq_clean.head())

# Optional: save cleaned data
df_fsq_clean.to_csv("foursquare_restaurants_cleaned.csv", index=False)
print("✅ Saved cleaned Foursquare dataset to foursquare_restaurants_cleaned.csv")


                             name  \
0                 Los Tacos No. 1   
1                Black Fox Coffee   
2                       The Odeon   
3       Laughing Man Coffee & Tea   
4  The Dead Rabbit Grocery & Grog   

                                             address rating  price_level  \
0  136 Church St (btw Murray & Warren), New York,...   None          NaN   
1                     70 Pine St, New York, NY 10005   None          NaN   
2  145 W Broadway (at Thomas St), New York, NY 10013   None          NaN   
3  184 Duane St (btwn Greenwich & Hudson St.), Ne...   None          NaN   
4  30 Water St (btwn Broad St & Coenties Slip), N...   None          NaN   

    latitude  longitude      source  
0  40.714290 -74.008730  foursquare  
1  40.706441 -74.007808  foursquare  
2  40.716902 -74.007873  foursquare  
3  40.717216 -74.010199  foursquare  
4  40.703164 -74.010953  foursquare  
✅ Saved cleaned Foursquare dataset to foursquare_restaurants_cleaned.csv


In [8]:
# Load both cleaned datasets
df_google_clean = pd.read_csv("google_restaurants_cleaned.csv")
df_fsq_clean = pd.read_csv("foursquare_restaurants_cleaned.csv")

# Combine
df_combined = pd.concat([df_google_clean, df_fsq_clean], ignore_index=True)

# Save combined data
df_combined.to_csv("combined_restaurants_cleaned.csv", index=False)

# Optional: Save to SQLite
import sqlite3
conn = sqlite3.connect("combined_restaurants.db")
df_combined.to_sql(name='restaurants', con=conn, if_exists='replace', index=False)
conn.close()

print("✅ Combined dataset saved to combined_restaurants_cleaned.csv and combined_restaurants.db")


✅ Combined dataset saved to combined_restaurants_cleaned.csv and combined_restaurants.db


In [10]:
import sqlite3
import pandas as pd

# Connect to the SQLite database
conn = sqlite3.connect("combined_restaurants.db")

# Preview first 10 rows from the 'restaurants' table
query = "SELECT * FROM restaurants LIMIT 10"
df_preview = pd.read_sql(query, conn)

# Display the result
print(df_preview)

# Close the connection
conn.close()


                           name  \
0           La Grande Boucherie   
1                    The Modern   
2                STK Steakhouse   
3              Tony's Di Napoli   
4  La Pecora Bianca Bryant Park   
5                            53   
6       Carmine's - Time Square   
7               Osteria La Baia   
8                     The Smith   
9                  Le Bernardin   

                                            address  rating  price_level  \
0  145 W 53rd St, New York, NY 10019, United States     4.6          2.0   
1    9 W 53rd St, New York, NY 10019, United States     4.6          4.0   
2   1114 6th Ave, New York, NY 10036, United States     4.8          3.0   
3  147 W 43rd St, New York, NY 10036, United States     4.6          2.0   
4   20 W 40th St, New York, NY 10018, United States     4.7          3.0   
5   53 W 53rd St, New York, NY 10019, United States     4.3          3.0   
6  200 W 44th St, New York, NY 10036, United States     4.5          2.0   
7  129