## **Imports**


In [99]:
import os
import pprint
import asyncio
import aiohttp
import requests
import pandas as pd

## **Verify Restaurant Reviews**

#### **Modified Reviews Count**


In [135]:
reviews_df = pd.read_csv('../data/modified_reviews.csv')

modified_review_counts = reviews_df.groupby("restaurant_id").agg(
    review_count=("rating", "size"),
    total_rating=("rating", "sum")
).reset_index()

modified_review_counts["rating"] = (modified_review_counts["total_rating"] / modified_review_counts["review_count"]).round(1)

modified_review_counts

Unnamed: 0,restaurant_id,review_count,total_rating,rating
0,65eda330d745307cb1a92556,24,108.0,4.5
1,65eda331d745307cb1a9255b,14,45.0,3.2
2,65eda332d745307cb1a92560,12,21.0,1.8
3,65eda332d745307cb1a92565,27,79.0,2.9
4,65eda333d745307cb1a9256a,36,170.0,4.7
...,...,...,...,...
4995,65edaec2d745307cb1a986e5,107,415.0,3.9
4996,65edaec3d745307cb1a986ea,64,165.0,2.6
4997,65edaec3d745307cb1a986ef,67,250.0,3.7
4998,65edaec4d745307cb1a986f4,31,86.0,2.8


In [136]:
modified_review_counts.loc[modified_review_counts['restaurant_id'] == '65edadc8d745307cb1a97eb6']

Unnamed: 0,restaurant_id,review_count,total_rating,rating
4576,65edadc8d745307cb1a97eb6,33,71.0,2.2


#### **Restaurant Reviews Count**


In [137]:
restaurant_reviews_df_db = pd.read_csv('../compare/restaurants.reviews.csv')

restaurant_review_db_counts = restaurant_reviews_df_db.groupby("restaurantId").agg(
    review_count=("rating", "size"),
    total_rating=("rating", "sum")
).reset_index()

restaurant_review_db_counts["rating"] = (restaurant_review_db_counts["total_rating"] / restaurant_review_db_counts["review_count"]).round(1)

#### **Reviews Count**


In [138]:
reviews_df_db = pd.read_csv('../compare/reviews.reviews.csv')

review_db_counts = reviews_df_db.groupby("restaurantId").agg(
    review_count=("rating", "size"),
    total_rating=("rating", "sum")
).reset_index()

review_db_counts["rating"] = (review_db_counts["total_rating"] / review_db_counts["review_count"]).round(1)

#### **Comparing Modified Reviews with Reviews**


In [103]:
count = 0

for index, row in modified_review_counts.iterrows():
    id = row['restaurant_id']
    review_count_db = row['review_count']
    review_rating_db = row['rating']
    
    registered_row = review_db_counts.loc[review_db_counts['restaurantId'] == id]
    
    if len(registered_row) != 1:
        print(f"Restaurant with Id {id} not found.")
        break
    
    data = registered_row.iloc[0]
    
    if (review_count_db != data['review_count'] or review_rating_db != data['rating']):
        count += 1
        print(f"Count is not equal: {id}. In DB: {review_count_db}, found: {data['review_count']}")

print(f"Count: {count}.")

Count: 0.


#### **Comparing Modified Reviews with Restaurant Reviews**


In [104]:
count = 0

for index, row in modified_review_counts.iterrows():
    id = row['restaurant_id']
    review_count_db = row['review_count']
    review_rating_db = row['rating']
    
    registered_row = restaurant_review_db_counts.loc[restaurant_review_db_counts['restaurantId'] == id]
    
    if len(registered_row) != 1:
        print(f"Restaurant with Id {id} not found.")
        break
    
    data = registered_row.iloc[0]
    
    if (review_count_db != data['review_count'] or review_rating_db != data['rating']):
        count += 1
        print(f"Count is not equal: {id}. In DB: {review_count_db}, found: {data['review_count']}")

print(f"Count: {count}.")

Count: 0.


In [105]:
print(f'Extra values: {len(restaurant_reviews_df_db) - len(reviews_df)}')

Extra values: 0


In [139]:
# Filter out rows where isDeleted is True
df1 = restaurant_reviews_df_db[restaurant_reviews_df_db['isDeleted'] == False]
df_filtered = df1[~df1['_id'].isin(reviews_df['id'].tolist())]
df_filtered.shape

(0, 7)

In [140]:
id_array = df_filtered['_id'].values.tolist()
id_array

[]

## **Verify Restaurant Rating**

In [141]:
map_restaurants = pd.read_csv('../compare/map.restaurants.csv', encoding='latin-1')
dining_restaurants = pd.read_csv('../compare/dining-plans.restaurants.csv', encoding='latin-1')
restaurants_df = pd.read_csv('../compare/restaurants.restaurants.csv', encoding='latin-1')

print(f'Map Restaurants: {len(map_restaurants)}')
print(f'Dining Restaurants: {len(dining_restaurants)}')
print(f'Restaurants: {len(restaurants_df)}')

Map Restaurants: 5000
Dining Restaurants: 5000
Restaurants: 5000


#### **Comparing Map Restaurants with Rating**


In [132]:
result_array = []

for index, row in map_restaurants.iterrows():
    id = row['_id']
    review_count_db = row['count']
    review_rating_db = row['rating']
    
    registered_row = modified_review_counts.loc[modified_review_counts['restaurant_id'] == id]
    
    if len(registered_row) != 1:
        print(f"Restaurant with Id {id} not found.")
        break
    
    data = registered_row.iloc[0]
    
    if (review_count_db != data['review_count'] or review_rating_db != data['rating']):
        print(f"Rating is not equal: {id}. In DB: {review_rating_db}, found: {data['rating']}")

        result_array.append({
            'id': id,
            'count': data['review_count'],
            'rating': data['rating']
        })

print("Result Array Length:", len(result_array))
print("Result Array:", result_array)

Result Array Length: 0
Result Array: []


#### **Comparing Dining Restaurants with Rating**


In [133]:
result_array = []

for index, row in dining_restaurants.iterrows():
    id = row['_id']
    review_count_db = row['count']
    review_rating_db = row['rating']
    
    registered_row = modified_review_counts.loc[modified_review_counts['restaurant_id'] == id]
    
    if len(registered_row) != 1:
        print(f"Restaurant with Id {id} not found.")
        break
    
    data = registered_row.iloc[0]
    
    if (review_count_db != data['review_count'] or review_rating_db != data['rating']):
        print(f"Rating is not equal: {id}. In DB: {review_rating_db}, found: {data['rating']}")

        result_array.append({
            'id': id,
            'count': data['review_count'],
            'rating': data['rating']
        })

print("Result Array Length:", len(result_array))
print("Result Array:", result_array)

Result Array Length: 0
Result Array: []


#### **Comparing Restaurants with Rating**


In [134]:
count = 0

for index, row in restaurants_df.iterrows():
    id = row['_id']
    review_count_db = row['count']
    
    registered_row = modified_review_counts.loc[modified_review_counts['restaurant_id'] == id]
    
    if len(registered_row) != 1:
        print(f"Restaurant with Id {id} not found.")
        break
    
    data = registered_row.iloc[0]
    
    if (review_count_db != data['review_count']):

        result_array.append({
            'id': id,
            'count': data['review_count'],
            'rating': data['rating']
        })

print("Result Array Length:", len(result_array))
print("Result Array:", result_array)

Result Array Length: 0
Result Array: []
