## **Imports**


In [3]:
import os
import pprint
import asyncio
import aiohttp
import requests
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


## **Loading Dataset**


In [None]:
restaurantsDf = pd.read_csv('./data/modified_restaurants.csv', encoding='latin-1')

In [None]:
restaurantsDf = pd.DataFrame()
reviews_file = './data/modified_reviews.csv'

if os.path.exists(reviews_file):
    reviewsDf = pd.read_csv(reviews_file, encoding='latin-1')
else:
    reviewsDf = pd.read_csv('./data/reviews.csv', encoding='latin-1')

### **Start from columns where there is no DineEase Id**


In [None]:
if 'id' in reviewsDf.columns:
    filteredDf = reviewsDf[reviewsDf['id'].isna()]
else:
    filteredDf = reviewsDf

reviewsDf.head(1)
filteredDf.head(1)

## **Workers**


In [None]:
existing_users_dict = {}

for index, row in pd.read_csv('./data/backup/users_backup.csv').iterrows():
    existing_users_dict[row["email"]] = row['id']

In [None]:
existing_users_dict

In [None]:
restaurants_dict = {}

for index, row in restaurantsDf.iterrows():
    restaurants_dict[row['business_id']] = row['id']

In [None]:
restaurants_dict

In [None]:
users_dict = {}
users_backup_file = './data/backup/review_users_backup.csv'

if os.path.exists(users_backup_file):
    users_backup_df = pd.read_csv(users_backup_file)
    
    for index, row in users_backup_df.iterrows():
        user_details = {
            "id": row["id"],
            "email": row["email"],
            "token": row["token"]
        }
        users_dict[row['original_id']] = user_details

In [None]:
users_dict

### **Generate User**


In [None]:
# fetch random user
def fetch_random_user():
    response = requests.get('https://randomuser.me/api/')
    data = response.json()
    user_data = data['results'][0]
    user = {
        "firstName": user_data['name']['first'],
        "lastName": user_data['name']['last'],
        "email": user_data['email'],
        "password": "Mujtaba@123",
        "role": "Manager",
    }
    return user

# insert user in the DineEase database
def insert_user(user):  
    response = requests.post('http://dine-ease.dev/api/auth/register', json=user)  
    
    if response.status_code == 201:
        new_user_data = response.json()
            
        user['id'] = new_user_data['id']
        user['token'] = new_user_data['token']
        
        return user
    else:
        print(f"Error creating user. Status code: {response.status_code}")

# append user in dictionaries
def create_user(user_id):
    while True:
        user = fetch_random_user()
        
        if user is None:
            print("No user fetched. Retrying...")
            continue

        if existing_users_dict.get(user['email']):
            print(user['email'], "is duplicated")
            continue
            
        else:
            new_user = insert_user(user)
            if new_user is None: continue
            new_user['original_id'] = user_id
            users_dict[user_id] = new_user
            break

### **Insert Restaurant**


In [None]:
async def process_restaurant(session, index, row):
    payload = {
        'rating': row['rating'],
        'content': row['content'],
        'createdAt': row['createdAt'],
    }
    
    # Check if review user exists in users_dict
    if not users_dict.get(row['user_id']):
        create_user(row['user_id'])


    user = users_dict.get(row['user_id'])
    restaurantId = restaurants_dict.get(row['business_id'])
    
    headers = {'Authorization': 'Bearer ' + user['token'], 'Content-Type': 'application/json'}
    
    async with session.post(f'http://dine-ease.dev/api/review/{restaurantId}', json=payload, headers=headers) as response:
        if response.status == 201:
            data = await response.json()
            
            reviewsDf.at[index, 'slug'] = data['slug']
            reviewsDf.at[index, 'id'] = data['id']
            reviewsDf.at[index, 'new_user_id'] = user['id']
            
            reviewsDf.to_csv('./data/reviews.csv', index=False)
            
            users_backup_df = pd.DataFrame.from_dict({k: {"original_id": v["original_id"], "id": v["id"], "email": v["email"], "token": v["token"]} for k, v in users_dict.items()}, orient='index')
            users_backup_df = users_backup_df[['original_id', 'id', 'email', 'token']]
            users_backup_df.to_csv('./data/backup/review_users_backup.csv', index=False)
            
        else:
            pprint.pprint(f"Request failed for row {index + 1}. Status code: {response.status}")
            pprint.pprint(await response.text())

In [25]:
async def main():
    async with aiohttp.ClientSession() as session:
        batch_size = 50
        # num_batches = 20
        for i in range(0, len(filteredDf), batch_size):
            batch_tasks = [process_restaurant(session, index, row) for index, row in filteredDf.iloc[i:i+batch_size].iterrows()]
            await asyncio.gather(*batch_tasks)
            
            # num_batches -= 1
            # if num_batches == 0:
            #     break
        
await main()

In [33]:
duplicates_mask = reviewsDf.head(1000)['user_id'].duplicated()
duplicates = reviewsDf.head(1000)[duplicates_mask]
print(f'{int(len(duplicates))} less users')
duplicates

10 less users


Unnamed: 0,review_id,user_id,business_id,rating,content,createdAt
271,T_x2i7uiQwIP0kq-2HsZLQ,KWLxxZKAcdUOBqBLMPW22w,-y5vYOsyL4bSFxeOtQtYbg,4.0,We love AJ's! The specialty food selections ar...,2016-04-24 02:11:38
618,9J5Xsa_uvf4gfVAShWd0Cw,XPSry7voMlJBXnVmWAIqdg,-Tnsc_b8dsWuAq1wQugKZA,5.0,This is still my all time favorite Mexican res...,2014-09-28 15:33:57
630,-t9JIsXwz36DDDa1RdOxJQ,gfQqQYI5_hCAGEHlHXIz2Q,-ATiAtTikuGuqvaW2O6tNA,5.0,"I was craving a salad, but I wanted a specific...",2006-07-28 02:56:48
661,g8n5A0vaEK7tANxKd-uBSQ,DE2g0b3AI29F6uyx3liW9A,-sSGKR1V1oBm7BJicIXozg,1.0,I came here and left a review before but last ...,2014-02-25 07:57:27
692,blq2r3ruoYxKscx0qshJtw,L_MCRUtGvOOMNA3S30Nj7Q,-ATiAtTikuGuqvaW2O6tNA,4.0,Got takeout from here for lunch. I had the Hou...,2014-03-18 20:27:18
695,KMwsYVEKvXeiFKuBaLvGbA,adsXV8FIXhCf2Nr1plc-pA,-d8B04ueyxADRutlcHYewQ,5.0,Another great experience with Hickory Kitchen....,2018-08-27 14:16:06
701,B3c0NKpeEnZty6yS-CVY0Q,97-dPyCYmQBykiutyfVuWw,-ATiAtTikuGuqvaW2O6tNA,4.0,"4 stars? I know, I'm as surprised as you are....",2009-11-03 02:53:14
781,LkeSpkG5SfdISN4jGBP0Ww,9FIPY_flWt0JH9UNs-LkDA,-sSGKR1V1oBm7BJicIXozg,5.0,We went there about a month ago and had a bad ...,2015-06-08 22:37:49
934,JQvGDV248m9bNPkTWlekVA,pUNaC4U5JuY2TIoM6rsmmw,-K0zTgGyxo-AeSkcV0IVaA,5.0,"Consistently outstanding food, excellent serv...",2017-07-29 02:00:57
979,sp6VkLvRK4K5rYBQqjDWwA,7ctnUScX9B362qXOebNk9w,-TCa3KBib07_1ko9L2Z0fQ,5.0,Foods great. Had issues ordering online throug...,2018-09-29 16:59:36
