## **Imports**


In [None]:
import os
import asyncio
import aiohttp
import pandas as pd

## **Required Users**


In [None]:
# reviews_users = pd.read_csv('./data/reviews.csv', encoding='latin-1').groupby('user_id')
# len(reviews_users)

In [None]:
# restaurant_users = pd.read_csv('./data/restaurants.csv', encoding='latin-1').head(10000).groupby('name')
# len(restaurant_users)

In [None]:
# print(f'Total Users Required: {len(restaurant_users) + len(reviews_users)}')

## **Workers**


In [None]:
users_dict = {}
users_df = pd.DataFrame()

In [None]:
users_file = './data/users.csv'

if os.path.exists(users_file):
    users_df = pd.read_csv(users_file)
    
    for index, row in users_df.iterrows():
        user_details = {   
            "firstName": row["firstName"],
            "lastName": row["lastName"],
            "email": row["email"]
        }
        users_dict[row["email"]] = user_details

### **Validate User**


In [None]:
async def validate_user(user):    
    if user is None:
        print("No user fetched. Retrying...")
        return
        
    email = user['email']
            
    if email in users_dict:
        print(email, "is duplicated")
        return
            
    else:
        users_dict[email] = user

### **Generate User**


#### **Radom User Api**


In [None]:
async def fetch_random_user(session):
    async with session.get('https://randomuser.me/api/?inc=name,email&nat=us,gb,ca,ie,nz,au,fi,ie&results=5000') as response:
        data = await response.json()
        results = data['results']
        for user_data in results:
            user = {
                "firstName": user_data['name']['first'],
                "lastName": user_data['name']['last'],
                "email": user_data['email'],
            }
            await validate_user(user)

#### **Radom Data User Api**


In [None]:
async def fetch_random_data_user(session):
    async with session.get('https://random-data-api.com/api/v2/users?size=100') as response:
        data = await response.json()
        for user_data in data:
            user = {
                "firstName": user_data['first_name'],
                "lastName": user_data['last_name'],
                "email": user_data['email'],
            }
            await validate_user(user)

In [None]:
async def main():
    iterations = 100
    async with aiohttp.ClientSession() as session:
        tasks = []
        for _ in range(iterations):
            tasks.append(fetch_random_data_user(session))
        await asyncio.gather(*tasks)

await main()

In [None]:
# async def main():
#     iterations = 1
#     async with aiohttp.ClientSession() as session:
#         for _ in range(iterations):
#             await fetch_random_user(session)

# await main()

In [None]:
users_df = pd.DataFrame.from_dict(users_dict, orient='index')
users_df = users_df[['firstName', 'lastName', 'email']]
users_df.to_csv('./data/users.csv', index=False)

In [None]:
duplicates_df = users_df[users_df.duplicated(subset=['email'], keep=False)]
duplicates_df