## **Imports**


In [1]:
import os
import time
import pprint
import asyncio
import aiohttp
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


## **Required Users**


In [2]:
reviews_users = pd.read_csv('../data/reviews.csv', encoding='latin-1').groupby('user_id')
len(reviews_users)

493532

In [3]:
restaurant_users = pd.read_csv('../data/restaurants.csv', encoding='latin-1').head(10000).groupby('name')
len(restaurant_users)

7883

In [4]:
required_users = len(restaurant_users) + len(reviews_users)
print(f'Total Users Required: {required_users}')

Total Users Required: 501415


## **Workers**


In [5]:
users_dict = {}
users_df = pd.DataFrame()

In [6]:
users_file = '../data/un_registered_users.csv'

if os.path.exists(users_file):
    users_df = pd.read_csv(users_file)
    
    print(f'Current Users: {len(users_df)}')

    if len(users_df) > required_users:
        users_df = users_df.head(required_users) 
        users_df.to_csv(users_file, index=False)
        print(f'Sliced users to : {len(users_df)}')
    

Current Users: 501415


In [7]:
for index, row in users_df.iterrows():
    users_dict[row["email"]] = row["email"]

In [8]:
len(users_dict)

501415

### **Validate User**


In [9]:
async def validate_user(user):    
    if user is None:
        print("No user fetched. Retrying...")
        return
        
    email = user['email']
            
    if users_dict.get(email):
        print(email, "is duplicated")
        return
            
    else:
        users_dict[email] = user

### **Generate User**


#### **Radom User Api**


In [10]:
async def fetch_random_user(session):
    async with session.get('https://randomuser.me/api/?inc=name,email&nat=us,gb,ca,ie,nz,au,fi,ie&results=5000') as response:
        data = await response.json()
        results = data['results']
        for user_data in results:
            user = {
                "firstName": user_data['name']['first'],
                "lastName": user_data['name']['last'],
                "email": user_data['email'],
            }
            await validate_user(user)

async def main():
    iterations = 2
    async with aiohttp.ClientSession() as session:
        for _ in range(iterations):
            await fetch_random_user(session)

if len(users_df) < required_users:
    await main()

#### **Radom Data User Api**


In [11]:
async def fetch_random_data_user(session):
    async with session.get('https://random-data-api.com/api/v2/users?size=100') as response:
        data = await response.json()
        for user_data in data:
            user = {
                "firstName": user_data['first_name'],
                "lastName": user_data['last_name'],
                "email": user_data['email'],
            }
            await validate_user(user)
            
async def main():
    iterations = 10
    # iterations = 500
    async with aiohttp.ClientSession() as session:
        tasks = []
        for _ in range(iterations):
            tasks.append(fetch_random_data_user(session))
        await asyncio.gather(*tasks)

if len(users_df) < required_users:
    await main()

In [12]:
if len(users_df) < required_users:
    print("Rewriting Dataframe due to New Users")
    users_df = pd.DataFrame.from_dict(users_dict, orient='index')
    users_df = users_df[['firstName', 'lastName', 'email']]
    users_df.to_csv('../data/un_registered_users.csv', index=False)


In [13]:
duplicates_df = users_df[users_df.duplicated(subset=['email'], keep=False)]
duplicates_df

Unnamed: 0,firstName,lastName,email


### **Switch between restaurant owner and review creators**

In [14]:
registered_users = {}

# Manager or User
role = "User"

registered_users_file = ''
unregistered_users_df = pd.DataFrame()

print("Total Restaurant Users:", len(restaurant_users))
print("Total Review Users:", len(reviews_users))

if role == "Manager":
    registered_users_file = '../data/restaurant_owners.csv'
    unregistered_users_df = users_df.iloc[:len(restaurant_users)]
else:
    registered_users_file = '../data/review_users.csv'
    unregistered_users_df = users_df.iloc[len(restaurant_users):]

print(len(unregistered_users_df))

Total Restaurant Users: 7883
Total Review Users: 493532
493532


In [15]:
if os.path.exists(registered_users_file):
    dataframe = pd.read_csv(registered_users_file)

    for index, row in dataframe.iterrows():
        user_details = {
            "id": row.get("id", None),
            "firstName": row.get("firstName", None),
            "lastName": row.get("lastName", None),
            "email": row.get("email", None),
            "role": row.get("role", None),
            "token": row.get("token", None),
        }
        registered_users[row['email']] = user_details
    
    unregistered_users_df = unregistered_users_df.iloc[len(dataframe):]

print(unregistered_users_df.shape)
unregistered_users_df.head(1)

(280531, 3)


Unnamed: 0,firstName,lastName,email
220884,Aretha,Jacobi,aretha.jacobi@email.com


In [16]:
len(registered_users)

213001

In [17]:
role

'User'

In [18]:
async def insert_user(session, index, row):
    
    email = row['email']
    
    user = {
        "firstName": row['firstName'],
        "lastName": row['lastName'],
        "email": email,
        "password": "Mujtaba@123",
        "role": role,
    }
    
    async with session.post('http://dine-ease.dev/api/auth/register', json=user, headers={'Content-Type': 'application/json'}) as response:
        
        if response.status == 201:
            user_data = await response.json()
            
            user.pop("password", None)
            user_data = {'id': user_data['id'], 'token': user_data['token']}
            registered_users[email] = {**user, **user_data}
                        
        else:    
            pprint.pprint(f"Request failed for {email}. Status code: {response.status}")
            pprint.pprint(await response.text())

async def main():
    async with aiohttp.ClientSession() as session:
        batch_size = 10
        num_batches = 8000
        
        for i in range(0, len(unregistered_users_df), batch_size):
            batch_tasks = [insert_user(session, index, row) for index, row in unregistered_users_df.iloc[i:i+batch_size].iterrows()]
            await asyncio.gather(*batch_tasks)
            
            dataframe = pd.DataFrame.from_dict({key: value for key, value in registered_users.items() if value['token'] is not None}, orient='index')
            dataframe = dataframe[['id', 'firstName', 'lastName', 'email', 'role', 'token']]
            dataframe.to_csv(registered_users_file, index=False)
            
            num_batches -= 1
            if num_batches == 0:
                break

if len(unregistered_users_df) != 0:
    await main()