In [2]:
!pip install faker

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting faker
  Downloading Faker-18.9.0-py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m23.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: faker
Successfully installed faker-18.9.0


In [2]:
import pandas as pd
import numpy as np
from faker import Faker

def generate_dummy_dataset(num_videos):
    fake = Faker()
    np.random.seed(42)
    
   
    video_ids = [fake.uuid4() for _ in range(num_videos)]
    user_ids = [fake.uuid4() for _ in range(num_videos)]
    video_titles = [fake.sentence(nb_words=5) for _ in range(num_videos)]
    video_descriptions = [fake.text(max_nb_chars=200) for _ in range(num_videos)]
    video_tags = [fake.words(nb=3) for _ in range(num_videos)]
    views = np.random.randint(low=1000, high=100000, size=num_videos)
    likes = np.random.randint(low=100, high=10000, size=num_videos)
    comments = np.random.randint(low=10, high=500, size=num_videos)
    age = np.random.randint(low=13, high=65, size=num_videos)
    gender = np.random.choice(['Male', 'Female'])
    shares = np.random.randint(low=5, high=200, size=num_videos)
    duration = np.random.randint(low=10, high=600, size=num_videos)
    hashtags = [fake.words(nb=2) for _ in range(num_videos)]
    timestamp = fake.date_time_between(start_date='-1y', end_date='now')
    locations = [fake.country() for _ in range(num_videos)]
    user_followers = np.random.randint(low=100, high=10000, size=num_videos)
    user_following = np.random.randint(low=50, high=500, size=num_videos)
    user_likes = np.random.randint(low=1000, high=100000, size=num_videos)
    user_comments = np.random.randint(low=500, high=10000, size=num_videos)
    user_shares = np.random.randint(low=200, high=5000, size=num_videos)
    
    dummy_data = pd.DataFrame({
        'Video ID': video_ids,
        'User ID': user_ids,
        'Video Title': video_titles,
        'Video Description': video_descriptions,
        'Video Tags': video_tags,
        'Views': views,
        'Likes': likes,
        'Comments': comments,
        'Age': age,
        'Gender': gender,
        'Shares': shares,
        'Duration': duration,
        'Hashtags': hashtags,
        'Timestamp': timestamp,
        'Location': locations,
        'User Followers': user_followers,
        'User Following': user_following,
        'User Likes': user_likes,
        'User Comments': user_comments,
        'User Shares': user_shares
    })
    
    return dummy_data

# Generate a dummy dataset with 2 million videos
dummy_dataset = generate_dummy_dataset(2000000)

# Display the dataset
print(dummy_dataset.head())


                               Video ID                               User ID  \
0  8e7bdd09-743a-4b5c-9f59-e008610c4220  eeb29d04-4348-4763-97f3-49e8fb7ca58f   
1  e9d074ac-5b42-451f-99fc-269f9c6db7dc  14079bd1-ec26-4860-8941-f77757a7d4fa   
2  2620eeea-7d5b-4349-91a5-a034f16f3881  9cfcff3a-5529-4ffd-8538-6cd99525abd6   
3  79de67ab-4a58-4fbc-aa48-19a590f330e9  fddea921-ad84-4ea9-a0bb-fe51ab6dd805   
4  0d993fc9-f7e4-4d09-b79a-be7994b98f16  7df4773b-48c2-409d-961a-421d7647e400   

                       Video Title  \
0            Address eye less job.   
1                    Any oil ball.   
2  Glass media mention alone room.   
3                    He in couple.   
4      Or likely team moment easy.   

                                   Video Description  \
0  Physical store stay. Reduce current phone summ...   
1  Fear matter himself south. Lead guy turn mater...   
2  Government street teacher my. Available profes...   
3  Everyone rise idea pretty director ten. Indivi...   
4  U

In [3]:
# Save the dataset to a CSV file
dummy_dataset.to_csv('dummy_dataset.csv', index=False)

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import os
import zipfile

csv_filename = 'dummy_dataset.csv'
output_path = '/content/drive/MyDrive/CV/' + csv_filename
zip_filename = 'dummy_dataset.zip'
zip_output_path = '/content/drive/MyDrive/CV/' + zip_filename

# Create the 'CV' subfolder if it doesn't exist
if not os.path.exists('/content/drive/MyDrive/CV/'):
    os.makedirs('/content/drive/MyDrive/CV/')

# Save the DataFrame to a CSV file
dummy_dataset.to_csv(output_path, index=False)

# Create a zip file
with zipfile.ZipFile(zip_output_path, 'w') as zipf:
    zipf.write(output_path, arcname=csv_filename)

print("CSV file saved as a zip file to the 'CV' subfolder in Google Drive.")


CSV file saved as a zip file to the 'CV' subfolder in Google Drive.


In [6]:
df = pd.read_csv('dummy_dataset.csv')

In [32]:
df.shape

(2000000, 21)

In [7]:
df.head()

Unnamed: 0,Video ID,User ID,Video Title,Video Description,Video Tags,Views,Likes,Comments,Age,Gender,Shares,Duration,Hashtags,Timestamp,Location,User Followers,User Following,User Likes,User Comments,User Shares
0,8e7bdd09-743a-4b5c-9f59-e008610c4220,eeb29d04-4348-4763-97f3-49e8fb7ca58f,Address eye less job.,Physical store stay. Reduce current phone summ...,"['night', 'method', 'generation']",16795,2924,259,34,Female,85,350,"['truth', 'pull']",2023-01-11 21:00:07,Nauru,5866,242,7037,8050,4787
1,e9d074ac-5b42-451f-99fc-269f9c6db7dc,14079bd1-ec26-4860-8941-f77757a7d4fa,Any oil ball.,Fear matter himself south. Lead guy turn mater...,"['new', 'camera', 'determine']",1860,2251,82,24,Female,146,402,"['past', 'exactly']",2023-01-11 21:00:07,Germany,3423,301,40221,1670,925
2,2620eeea-7d5b-4349-91a5-a034f16f3881,9cfcff3a-5529-4ffd-8538-6cd99525abd6,Glass media mention alone room.,Government street teacher my. Available profes...,"['skin', 'crime', 'within']",77820,6513,70,31,Female,99,186,"['mind', 'act']",2023-01-11 21:00:07,Sao Tome and Principe,1250,136,18469,4659,3823
3,79de67ab-4a58-4fbc-aa48-19a590f330e9,fddea921-ad84-4ea9-a0bb-fe51ab6dd805,He in couple.,Everyone rise idea pretty director ten. Indivi...,"['mention', 'born', 'act']",55886,7781,289,19,Female,92,580,"['author', 'bar']",2023-01-11 21:00:07,Uganda,3848,224,77512,1669,4171
4,0d993fc9-f7e4-4d09-b79a-be7994b98f16,7df4773b-48c2-409d-961a-421d7647e400,Or likely team moment easy.,Until size account stay reason spring field. S...,"['along', 'particular', 'down']",7265,2431,385,43,Female,110,492,"['ask', 'experience']",2023-01-11 21:00:07,Iran,2045,482,79604,6414,3430


In [8]:
df[df['Location'] == 'Ghana'].head(5)

Unnamed: 0,Video ID,User ID,Video Title,Video Description,Video Tags,Views,Likes,Comments,Age,Gender,Shares,Duration,Hashtags,Timestamp,Location,User Followers,User Following,User Likes,User Comments,User Shares
411,fc60b8a2-c9fc-49b4-a440-4b6a333ecdab,88eb52e6-156c-4e06-a074-a606833c7f6f,Lawyer claim fill.,Act as skin learn public.\nAlmost product bill...,"['allow', 'floor', 'seek']",92083,2307,230,31,Female,132,172,"['room', 'price']",2023-01-11 21:00:07,Ghana,5495,211,97522,4302,4574
1001,cbd418ca-2cf8-49c9-a134-02b04a94fb6a,15ebab52-b940-4d0a-824c-df14c38f8701,Deal center which avoid anyone pattern.,Least first religious out establish cup centur...,"['visit', 'my', 'yard']",28285,4502,343,14,Female,188,13,"['those', 'name']",2023-01-11 21:00:07,Ghana,6840,395,71394,4895,3362
1199,ca2417f2-cee0-45c2-94cb-60dcf5170dc5,952b449a-5cf4-4bc9-8abd-14290d81a4bd,Picture investment son nearly produce including.,Fight idea child only significant current. Pro...,"['several', 'them', 'threat']",54028,670,96,36,Female,34,531,"['mind', 'rate']",2023-01-11 21:00:07,Ghana,4481,344,49778,5233,3422
1287,1d9b12c0-e323-4a77-9b9e-e9433ad3e92d,e6473ec3-6b4d-47d0-9407-dc93fe21c1a3,Soldier nearly have right strategy very.,Information information hotel edge religious s...,"['respond', 'five', 'part']",90106,3409,109,46,Female,64,89,"['late', 'then']",2023-01-11 21:00:07,Ghana,3764,97,23021,6018,4453
2548,0df3633e-3eeb-4a78-8eee-af32a881c5ed,800012e6-3369-4da5-8661-e045216f7786,Friend magazine after knowledge significant.,Evening official follow detail alone she accou...,"['outside', 'image', 'participant']",72966,1710,110,15,Female,196,183,"['every', 'security']",2023-01-11 21:00:07,Ghana,6204,436,31423,637,4271
