In [1]:
%load_ext sql

In [2]:
### Notebook Imports

import pandas as pd
import numpy as np
import random
import sqlite3 
from faker import Faker

from IPython.display import display

### Set Notebook Parameters
pd.set_option('display.max_columns', None)

<div style="text-align: center;">
    <img src="images/data_engineering_step.png" alt="Feature Engineering" style="width: 1000px;"/>
</div>

# Feature Engineering for the Pairy Application
#### ``Instagram Features:``
- An Instagram profile consists of various features that provide information about the user, their content, interactions, and engagement. Here's a comprehensive list of the key features that make up an Instagram profile:

- `Profile Picture:` The user's profile picture, which represents their identity.
- `Name:` The user's name as displayed on the profile.
- `Username:` The unique username that other users can use to mention or tag the profile.
- `Bio:` A short description or introduction that users can provide to give others an idea about themselves or their brand.
- `Website:` An optional link that users can include to direct visitors to an external website or webpage.
- `Followers:` The count of users who follow the profile.
- `Following:` The count of profiles that the user is following.
- `Posts:` The count of images or videos the user has shared on their profile.
- `IGTV Videos:` The count of long-form videos shared on IGTV (Instagram TV).
- `Reels:` The count of short video clips created using the Reels feature.
- `Tagged Photos:` Photos in which the user is tagged by others.
- `Highlights:` Collections of Stories that users can group together and showcase at the top of their profile.
- `Stories:` Temporary photos or videos that disappear after 24 hours.
- `Saved:` A collection of saved posts, organized into collections created by the user.
- `Follow Button:` Allows users to follow the profile and see its updates on their feed.
- `Message Button:` Allows users to send direct messages to the profile.
- `Contact Options:` Business accounts can provide contact buttons for phone calls, email, or directions.
- `Category and Contact Information:` For business profiles, categories and contact details can be displayed on the profile.
- `IGTV Tab:` Displays IGTV videos created by the user or shared from other users.
- `Reels Tab:` Displays short video clips created using the Reels feature.
- `Photo Grid:` Displays a grid of the user's shared photos and videos.
- `Photos and Videos:` The content shared by the user, including images and videos.
- `Followers Insights:` Analytics and insights about the demographics and activity of the user's followers.
- `Stories Archive:` An archive of the user's expired Stories, which they can revisit.
- `Story Highlights Archive:` An archive of the user's expired Story Highlights.
- `Activity:` Displays recent activity on the profile, including likes, comments, and follows.
- `Posts Insights:` Analytics and insights about the engagement and performance of individual posts.

Simplified database schema representation for the features of an Instagram profile.
- **User Table :**

| Field Name      | Data Type         | Constraints               |
|-----------------|-------------------|---------------------------|
| user_id         | INT (Primary Key) | Auto-increment           |
| instagram_name            | VARCHAR(100)      | NOT NULL                 |
| instagram_username        | VARCHAR(50)       | UNIQUE NOT NULL          |
| instagram_bio             | TEXT              |                           |
| instagram_website         | VARCHAR(100)      |                           |
| instagram_followers_count | INT               |                           |
| instagram_following_count | INT               |                           |

- **Post Table:**

| Field Name  | Data Type         | Constraints                   |
|-------------|-------------------|-------------------------------|
| instagram_post_id     | INT (Primary Key) | Auto-increment               |
| user_id     | INT (Foreign Key) | References User(user_id)     |
| instagram_image_url   | VARCHAR(255)      | NOT NULL                     |
| instagram_caption     | TEXT              |                             |
| instagram_likes_count | INT               |                             |
| instagram_comments_count | INT            |                             |
| instagram_post_date   | DATETIME          |                             |

- **IGTV Video Table:**

| Field Name | Data Type         | Constraints                   |
|------------|-------------------|-------------------------------|
| instagram_igvt_id    | INT (Primary Key) | Auto-increment               |
| user_id    | INT (Foreign Key) | References User(user_id)     |
| instagram_video_url  | VARCHAR(255)      | NOT NULL                     |
| instagram_title      | VARCHAR(100)      |                             |
| instagram_views_count | INT               |                             |
| instagram_video_date | DATETIME          |                             |

- **Reel Table:**

| Field Name | Data Type         | Constraints                   |
|------------|-------------------|-------------------------------|
| instagram_reel_id    | INT (Primary Key) | Auto-increment               |
| user_id    | INT (Foreign Key) | References User(user_id)     |
| instagram_video_url  | VARCHAR(255)      | NOT NULL                     |
| instagram_caption    | TEXT              |                             |
| instagram_likes_count | INT               |                             |
| instagram_video_date | DATETIME          |                             |

## SQL Scripts for Creating Tables

Below are the SQL scripts for creating the required tables in the database. These scripts can be executed using a SQL client or command line interface for your chosen database system.


```sql
CREATE TABLE user (
    user_id INT PRIMARY KEY,
    instagram_name VARCHAR(100) NOT NULL,
    instagram_username VARCHAR(50) UNIQUE NOT NULL,
    instagram_bio TEXT,
    instagram_website VARCHAR(100),
    instagram_followers_count INT,
    instagram_following_count INT
);

CREATE TABLE post (
    instagram_post_id INT PRIMARY KEY,
    user_id INT,
    instagram_image_url VARCHAR(255) NOT NULL,
    instagram_caption TEXT,
    instagram_likes_count INT,
    instagram_comments_count INT,
    instagram_post_date DATETIME,
    FOREIGN KEY (user_id) REFERENCES user(user_id)
);

CREATE TABLE igtv_video (
    instagram_igvt_id INT PRIMARY KEY,
    user_id INT,
    instagram_video_url VARCHAR(255) NOT NULL,
    instagram_title VARCHAR(100),
    instagram_views_count INT,
    instagram_video_date DATETIME,
    FOREIGN KEY (user_id) REFERENCES user(user_id)
);

CREATE TABLE reel (
    instagram_reel_id INT PRIMARY KEY,
    user_id INT,
    instagram_video_url VARCHAR(255) NOT NULL,
    instagram_caption TEXT,
    instagram_likes_count INT,
    instagram_video_date DATETIME,
    FOREIGN KEY (user_id) REFERENCES user(user_id)
);

#### Generating dummy data for the instagram profile

In [3]:
def generate_dummy_data(num_users=10, num_posts=50, num_igtv_videos=20, num_reels=30):
    """
    Generate dummy data for building a recommendation system.

    Parameters:
    - num_users (int): Number of dummy users.
    - num_posts (int): Number of dummy posts.
    - num_igtv_videos (int): Number of dummy IGTV videos.
    - num_reels (int): Number of dummy Reels.

    Returns:
    - merged_data (pd.DataFrame): Merged dataframe containing user, post, IGTV video, and Reel data.
    """

    fake = Faker()

    # Generate user data
    users = pd.DataFrame({
        'user_id': range(1, num_users + 1),
        'instagram_name': [fake.name() for _ in range(num_users)],
        'instagram_username': [fake.user_name() for _ in range(num_users)],
        'instagram_bio': [fake.sentence() for _ in range(num_users)],
        'instagram_website': ['www.{}.com'.format(fake.user_name()) for _ in range(num_users)],
        'instagram_followers_count': np.random.randint(100, 1000, size=num_users),
        'instagram_following_count': np.random.randint(50, 500, size=num_users)
    })

    # Generate post data
    posts = pd.DataFrame({
        'instagram_post_id': range(1, num_posts + 1),
        'user_id': np.random.choice(users['user_id'], size=num_posts),
        'instagram_image_url': [fake.image_url() for _ in range(num_posts)],
        'instagram_caption': [fake.sentence() for _ in range(num_posts)],
        'instagram_likes_count': np.random.randint(10, 1000, size=num_posts),
        'instagram_comments_count': np.random.randint(5, 200, size=num_posts),
        'instagram_post_date': pd.date_range(start='2023-01-01', periods=num_posts)
    })

    # Generate IGTV video data
    igtv_videos = pd.DataFrame({
        'instagram_igvt_id': range(1, num_igtv_videos + 1),
        'user_id': np.random.choice(users['user_id'], size=num_igtv_videos),
        'instagram_video_url': [fake.url() for _ in range(num_igtv_videos)],
        'instagram_title': [fake.sentence() for _ in range(num_igtv_videos)],
        'instagram_views_count': np.random.randint(100, 5000, size=num_igtv_videos),
        'instagram_video_date': pd.date_range(start='2023-01-01', periods=num_igtv_videos)
    })

    # Generate Reel data
    reels = pd.DataFrame({
        'instagram_reel_id': range(1, num_reels + 1),
        'user_id': np.random.choice(users['user_id'], size=num_reels),
        'instagram_video_url': [fake.url() for _ in range(num_reels)],
        'instagram_caption': [fake.sentence() for _ in range(num_reels)],
        'instagram_likes_count': np.random.randint(10, 1000, size=num_reels),
        'instagram_video_date': pd.date_range(start='2023-01-01', periods=num_reels)
    })
    # make username as conct.. of name with small letters
    users['instagram_username'] = users['instagram_name'].str.replace(' ', '').str.lower()

    # make website a function of username by concatenating www. and .com
    users['instagram_website'] = 'www.' + users['instagram_username'] + '.com'

    # Merge dataframes
    merged_data = pd.merge(posts, igtv_videos, on='user_id', how='outer', suffixes=('_post', '_igtv'))
    merged_data = pd.merge(merged_data, reels, on='user_id', how
    ='outer', suffixes=('_merge', '_reel'))
    merged_data = pd.merge(merged_data, users, on='user_id', how='left')

    return merged_data, users, posts, igtv_videos, reels

In [4]:
# Generate and display the dummy data
dummy_data, users, posts, igtv_videos, reels  = generate_dummy_data()

In [5]:
users.sample(3)

Unnamed: 0,user_id,instagram_name,instagram_username,instagram_bio,instagram_website,instagram_followers_count,instagram_following_count
7,8,Kimberly Salas,kimberlysalas,Hair investment ball industry message south.,www.kimberlysalas.com,269,63
2,3,Rodney King,rodneyking,As fall land unit.,www.rodneyking.com,355,331
9,10,William Shaw,williamshaw,Method stay include hundred only.,www.williamshaw.com,333,461


In [6]:
posts.sample(3)

Unnamed: 0,instagram_post_id,user_id,instagram_image_url,instagram_caption,instagram_likes_count,instagram_comments_count,instagram_post_date
8,9,8,https://picsum.photos/152/532,Should grow voice development position perform.,996,168,2023-01-09
43,44,9,https://dummyimage.com/8x691,Fly teach manage pull yard.,315,142,2023-02-13
25,26,8,https://dummyimage.com/160x300,Father really look campaign.,458,190,2023-01-26


In [7]:
igtv_videos.sample(3)

Unnamed: 0,instagram_igvt_id,user_id,instagram_video_url,instagram_title,instagram_views_count,instagram_video_date
14,15,8,http://www.quinn.com/,Bit small instead plan.,1960,2023-01-15
4,5,1,https://www.smith.com/,Political middle sing page daughter without.,4222,2023-01-05
12,13,6,http://www.villanueva.com/,Interest that leader change scene interest dec...,4716,2023-01-13


In [8]:
reels.sample(3)

Unnamed: 0,instagram_reel_id,user_id,instagram_video_url,instagram_caption,instagram_likes_count,instagram_video_date
29,30,2,http://www.rogers.info/,Either back fear respond.,465,2023-01-30
6,7,7,https://avila.com/,Star girl energy quality majority protect.,838,2023-01-07
2,3,3,https://www.nelson.com/,Media agent no nation laugh hard.,605,2023-01-03


In [9]:
dummy_data.sample(3)

Unnamed: 0,instagram_post_id,user_id,instagram_image_url,instagram_caption_merge,instagram_likes_count_merge,instagram_comments_count,instagram_post_date,instagram_igvt_id,instagram_video_url_merge,instagram_title,instagram_views_count,instagram_video_date_merge,instagram_reel_id,instagram_video_url_reel,instagram_caption_reel,instagram_likes_count_reel,instagram_video_date_reel,instagram_name,instagram_username,instagram_bio,instagram_website,instagram_followers_count,instagram_following_count
191,10,4,https://dummyimage.com/26x261,Us become threat owner heart none.,613,47,2023-01-10,1,https://www.robinson.com/,Moment stuff to child third might.,4599,2023-01-01,6.0,https://phillips-thomas.com/,Decade particular heart must you story.,679.0,2023-01-06,Lindsey Fletcher DDS,lindseyfletcherdds,Toward particularly message author some thousand.,www.lindseyfletcherdds.com,972,367
108,5,10,https://placekitten.com/844/853,Gas cultural security improve place.,27,21,2023-01-05,3,http://www.massey-conway.org/,Suggest here very tree debate system.,3031,2023-01-03,16.0,https://www.howard-ryan.info/,Democrat child wall church charge fine goal.,736.0,2023-01-16,William Shaw,williamshaw,Method stay include hundred only.,www.williamshaw.com,333,461
38,12,8,https://picsum.photos/60/453,Drive serious huge despite out information share.,754,37,2023-01-12,15,http://www.quinn.com/,Bit small instead plan.,1960,2023-01-15,15.0,http://www.hill-hoffman.com/,This contain material prepare dog really.,978.0,2023-01-15,Kimberly Salas,kimberlysalas,Hair investment ball industry message south.,www.kimberlysalas.com,269,63


## Include additional features from the Pairy App

<table>
  <tr>
    <td>
      <img src="images/pairy1.jpg" alt="Image 1" style="width: 200px;"/>
    </td>
    <td>
      <img src="images/pairy2.jpg" alt="Image 2" style="width: 200px;"/>
    </td>
    <td>
      <img src="images/pairy3.jpg" alt="Image 3" style="width: 200px;"/>
    </td>
    <td>
      <img src="images/pairy4.jpg" alt="Image 4" style="width: 200px;"/>
    </td>
  </tr>
  <tr>
    <td>
      <img src="images/pairy5.jpg" alt="Image 5" style="width: 200px;"/>
    </td>
    <td>
      <img src="images/pairy6.jpg" alt="Image 6" style="width: 200px;"/>
    </td>
    <td>
      <img src="images/pairy7.jpg" alt="Image 7" style="width: 200px;"/>
    </td>
    <td>
      <img src="images/pairy8.jpg" alt="Image 8" style="width: 200px;"/>
    </td>
  </tr>
  <tr>
    <td>
      <img src="images/pairy9.jpg" alt="Image 9" style="width: 200px;"/>
    </td>
    <td>
      <img src="images/pairy10.jpg" alt="Image 10" style="width: 200px;"/>
    </td>
    <td>
      <img src="images/pairy11.jpg" alt="Image 11" style="width: 200px;"/>
    </td>
    <td>
      <img src="images/pairy12.jpg" alt="Image 12" style="width: 200px;"/>
    </td>
  </tr>
  <tr>
    <td>
      <img src="images/pairy13.jpg" alt="Image 13" style="width: 200px;"/>
    </td>
    <td>
      <img src="images/pairy14.jpg" alt="Image 14" style="width: 200px;"/>
    </td>
    <td>
      <img src="images/pairy15.jpg" alt="Image 15" style="width: 200px;"/>
    </td>
    <td>
      <img src="images/pairy16.jpg" alt="Image 16" style="width: 200px;"/>
    </td>
  </tr>
</table>


`Additional Features from the Pairy Application:`
- influencer_name
- influencer_dob
- influencer_introduction
- influencer_location
- influencer_gender
- influencer_gallery_img_url_1
- influencer_gallery_img_url_2
- influencer_gallery_img_url_3
- influencer_gallery_img_url_4
- influencer_gallery_img_url_5
- influencer_gallery_img_url_6
- influencer_gallery_img_url_7
- influencer_gallery_vid_url
- influencer_hourly_rate
- influencer_fixed_rate

In [11]:
def add_influencer_features(data, num_influencers=10):
    """
    Add additional influencer features to the generated data.

    Parameters:
    - data (tuple): Tuple containing generated dataframes (merged_data, users, posts, igtv_videos, reels).
    - num_influencers (int): Number of additional influencers to generate.

    Returns:
    - extended_data (tuple): Tuple containing extended dataframes with additional influencer features.
    """


    fake = Faker()

    # Generate influencer data
    influencers = pd.DataFrame({
        'user_id': range(1, num_influencers + 1),
        'pairy_name': [fake.name() for _ in range(num_influencers)],
        'pairy_username': [fake.user_name() for _ in range(num_influencers)],
        'pairy_bio': [fake.sentence() for _ in range(num_influencers)],
        'pairy_website': ['www.{}.com'.format(fake.user_name()) for _ in range(num_influencers)],
        'pairy_followers_count': np.random.randint(1000, 10000, size=num_influencers),
        'pairy_following_count': np.random.randint(100, 1000, size=num_influencers),
        'pairy_dob': pd.date_range(start='1980-01-01', periods=num_influencers),
        'pairy_introduction': [fake.paragraph() for _ in range(num_influencers)],
        'pairy_location': [fake.city() for _ in range(num_influencers)],
        'pairy_gender': [random.choice(['male', 'female', 'other']) for _ in range(num_influencers)],
        'pairy_gallery_img_url_1': [fake.image_url() for _ in range(num_influencers)],
        'pairy_gallery_img_url_2': [fake.image_url() for _ in range(num_influencers)],
        'pairy_gallery_img_url_3': [fake.image_url() for _ in range(num_influencers)],
        'pairy_gallery_img_url_4': [fake.image_url() for _ in range(num_influencers)],
        'pairy_gallery_img_url_5': [fake.image_url() for _ in range(num_influencers)],
        'pairy_gallery_img_url_6': [fake.image_url() for _ in range(num_influencers)],
        'pairy_gallery_img_url_7': [fake.image_url() for _ in range(num_influencers)],
        'pairy_gallery_vid_url': [fake.url() for _ in range(num_influencers)],
        'pairy_hourly_rate': [random.uniform(20, 200) for _ in range(num_influencers)],
        'pairy_fixed_rate': [random.uniform(100, 1000) for _ in range(num_influencers)]
    })

    # make username as conct.. of name with small letters
    influencers['pairy_username'] = influencers['pairy_name'].str.replace(' ', '').str.lower()

    # make website a function of username by concatenating www. and .com
    influencers['pairy_website'] = 'www.' + influencers['pairy_username'] + '.com'
    # Merge influencer data
    extended_users = pd.merge(users, influencers, left_on ='user_id', right_on='user_id', how='left')

    # Modify to align instagram name, username, and website to align with Pairy
    extended_users['instagram_name'] = extended_users['pairy_name'] 

    extended_users['instagram_username'] = extended_users['pairy_username']

    extended_users['instagram_website'] = extended_users['pairy_website']

    # Remove duplicating rows between dummy_data and exyended_users
    keep_columns = ['instagram_post_id', 'user_id', 'instagram_image_url',
       'instagram_caption_merge', 'instagram_likes_count_merge',
       'instagram_comments_count', 'instagram_post_date', 'instagram_igvt_id',
       'instagram_video_url_merge', 'instagram_title', 'instagram_views_count',
       'instagram_video_date_merge', 'instagram_reel_id',
       'instagram_video_url_reel', 'instagram_caption_reel',
       'instagram_likes_count_reel', 'instagram_video_date_reel']
    

    extended_merged_data = pd.merge(dummy_data[keep_columns], extended_users, left_on='user_id', right_on='user_id', how='left')

    return extended_merged_data, extended_users, posts, igtv_videos, reels


In [12]:
# Add influencer features to the data
extended_data = add_influencer_features(dummy_data)

# Unpack the extended_data tuple
extended_merged_data, extended_users, extended_posts, extended_igtv_videos, extended_reels = extended_data

In [13]:
extended_users.sample(3)

Unnamed: 0,user_id,instagram_name,instagram_username,instagram_bio,instagram_website,instagram_followers_count,instagram_following_count,pairy_name,pairy_username,pairy_bio,pairy_website,pairy_followers_count,pairy_following_count,pairy_dob,pairy_introduction,pairy_location,pairy_gender,pairy_gallery_img_url_1,pairy_gallery_img_url_2,pairy_gallery_img_url_3,pairy_gallery_img_url_4,pairy_gallery_img_url_5,pairy_gallery_img_url_6,pairy_gallery_img_url_7,pairy_gallery_vid_url,pairy_hourly_rate,pairy_fixed_rate
5,6,Stephen Palmer,stephenpalmer,Student ready resource teacher.,www.stephenpalmer.com,315,210,Stephen Palmer,stephenpalmer,Future particular and everyone him five.,www.stephenpalmer.com,9221,851,1980-01-06,Attack try community often hand. Authority art...,New Jeffreymouth,male,https://dummyimage.com/624x284,https://placekitten.com/420/728,https://placekitten.com/283/705,https://picsum.photos/189/722,https://dummyimage.com/117x164,https://picsum.photos/201/893,https://placekitten.com/398/45,https://www.sanford-olsen.biz/,97.890603,378.099669
8,9,Tracy Middleton,tracymiddleton,Practice bring act behavior performance material.,www.tracymiddleton.com,804,382,Tracy Middleton,tracymiddleton,Tough just front artist arrive before.,www.tracymiddleton.com,5838,962,1980-01-09,Agreement staff see process. Me executive part...,East Elizabeth,male,https://placekitten.com/290/115,https://placekitten.com/472/306,https://picsum.photos/768/994,https://picsum.photos/805/535,https://dummyimage.com/770x511,https://dummyimage.com/229x994,https://placekitten.com/965/878,https://www.anderson.com/,126.96781,651.479206
9,10,Regina Mcdonald MD,reginamcdonaldmd,Method stay include hundred only.,www.reginamcdonaldmd.com,333,461,Regina Mcdonald MD,reginamcdonaldmd,For town cut today hundred teach program.,www.reginamcdonaldmd.com,2523,596,1980-01-10,Everyone against tough ask surface fund serve....,Port Heidi,other,https://picsum.photos/389/407,https://picsum.photos/859/568,https://placekitten.com/15/519,https://dummyimage.com/487x594,https://placekitten.com/1016/187,https://picsum.photos/268/855,https://picsum.photos/647/509,http://www.dillon.org/,185.084424,776.867751


In [108]:
extended_posts.sample(3)

Unnamed: 0,instagram_post_id,user_id,instagram_image_url,instagram_caption_merge,instagram_likes_count_merge,instagram_comments_count,instagram_post_date,instagram_igvt_id,instagram_video_url_merge,instagram_title,instagram_views_count,instagram_video_date_merge,instagram_reel_id,instagram_video_url_reel,instagram_caption_reel,instagram_likes_count_reel,instagram_video_date_reel,instagram_name,instagram_username,instagram_bio,instagram_website,instagram_followers_count,instagram_following_count,pairy_name,pairy_username,pairy_bio,pairy_website,pairy_followers_count,pairy_following_count,pairy_dob,pairy_introduction,pairy_location,pairy_gender,pairy_gallery_img_url_1,pairy_gallery_img_url_2,pairy_gallery_img_url_3,pairy_gallery_img_url_4,pairy_gallery_img_url_5,pairy_gallery_img_url_6,pairy_gallery_img_url_7,pairy_gallery_vid_url,pairy_hourly_rate,pairy_fixed_rate
0,1,9,https://picsum.photos/521/239,Rule word argue where agree into.,614,50,2023-01-01,9.0,http://www.hood.net/,Final explain quality fund.,3192.0,2023-01-09,10,http://www.lee.com/,Listen difficult find director knowledge begin.,895,2023-01-10,Edward Hampton,edwardhampton,Baby church win reach world grow herself site.,www.edwardhampton.com,136,84,Edward Hampton,edwardhampton,Past trial evidence successful security up.,www.edwardhampton.com,2062,789,1980-01-09,Every charge pay win particular. Today exactly...,New Cassie,female,https://picsum.photos/341/801,https://placekitten.com/121/168,https://dummyimage.com/779x30,https://placekitten.com/446/364,https://placekitten.com/295/242,https://dummyimage.com/840x526,https://dummyimage.com/652x681,https://neal-skinner.org/,108.30303,432.53448
1,1,9,https://picsum.photos/521/239,Rule word argue where agree into.,614,50,2023-01-01,9.0,http://www.hood.net/,Final explain quality fund.,3192.0,2023-01-09,11,https://www.sweeney-andrews.com/,Always return whatever.,556,2023-01-11,Edward Hampton,edwardhampton,Baby church win reach world grow herself site.,www.edwardhampton.com,136,84,Edward Hampton,edwardhampton,Past trial evidence successful security up.,www.edwardhampton.com,2062,789,1980-01-09,Every charge pay win particular. Today exactly...,New Cassie,female,https://picsum.photos/341/801,https://placekitten.com/121/168,https://dummyimage.com/779x30,https://placekitten.com/446/364,https://placekitten.com/295/242,https://dummyimage.com/840x526,https://dummyimage.com/652x681,https://neal-skinner.org/,108.30303,432.53448
2,1,9,https://picsum.photos/521/239,Rule word argue where agree into.,614,50,2023-01-01,9.0,http://www.hood.net/,Final explain quality fund.,3192.0,2023-01-09,12,http://gonzales.net/,Letter big west director.,98,2023-01-12,Edward Hampton,edwardhampton,Baby church win reach world grow herself site.,www.edwardhampton.com,136,84,Edward Hampton,edwardhampton,Past trial evidence successful security up.,www.edwardhampton.com,2062,789,1980-01-09,Every charge pay win particular. Today exactly...,New Cassie,female,https://picsum.photos/341/801,https://placekitten.com/121/168,https://dummyimage.com/779x30,https://placekitten.com/446/364,https://placekitten.com/295/242,https://dummyimage.com/840x526,https://dummyimage.com/652x681,https://neal-skinner.org/,108.30303,432.53448


In [14]:
extended_igtv_videos.sample(3)

Unnamed: 0,instagram_igvt_id,user_id,instagram_video_url,instagram_title,instagram_views_count,instagram_video_date
17,18,7,http://www.young.com/,Follow market there near development forget.,1641,2023-01-18
0,1,4,https://www.robinson.com/,Moment stuff to child third might.,4599,2023-01-01
1,2,3,https://schneider-porter.net/,Break attack until industry much put still.,434,2023-01-02


In [15]:
extended_reels.sample(3)

Unnamed: 0,instagram_reel_id,user_id,instagram_video_url,instagram_caption,instagram_likes_count,instagram_video_date
4,5,5,http://leonard.com/,Old enjoy affect material.,271,2023-01-05
16,17,7,http://www.jones-harrington.com/,Student produce if teacher.,839,2023-01-17
23,24,10,http://rose.com/,Scientist improve local want through PM week.,440,2023-01-24


In [16]:
extended_merged_data.sample(3)

Unnamed: 0,instagram_post_id,user_id,instagram_image_url,instagram_caption_merge,instagram_likes_count_merge,instagram_comments_count,instagram_post_date,instagram_igvt_id,instagram_video_url_merge,instagram_title,instagram_views_count,instagram_video_date_merge,instagram_reel_id,instagram_video_url_reel,instagram_caption_reel,instagram_likes_count_reel,instagram_video_date_reel,instagram_name,instagram_username,instagram_bio,instagram_website,instagram_followers_count,instagram_following_count,pairy_name,pairy_username,pairy_bio,pairy_website,pairy_followers_count,pairy_following_count,pairy_dob,pairy_introduction,pairy_location,pairy_gender,pairy_gallery_img_url_1,pairy_gallery_img_url_2,pairy_gallery_img_url_3,pairy_gallery_img_url_4,pairy_gallery_img_url_5,pairy_gallery_img_url_6,pairy_gallery_img_url_7,pairy_gallery_vid_url,pairy_hourly_rate,pairy_fixed_rate
101,40,5,https://dummyimage.com/978x382,Air together tonight skill because support.,365,50,2023-02-09,6,http://www.bird.com/,Soldier suffer spend hard Democrat.,185,2023-01-06,12.0,https://martin.info/,Thing yeah democratic mind quite.,961.0,2023-01-12,Kelly Jackson,kellyjackson,Laugh suddenly ask very.,www.kellyjackson.com,380,62,Kelly Jackson,kellyjackson,Return financial pull back.,www.kellyjackson.com,1046,362,1980-01-05,Organization life buy. Military foreign cover ...,Brandifort,female,https://picsum.photos/30/190,https://picsum.photos/596/3,https://placekitten.com/269/789,https://picsum.photos/479/732,https://picsum.photos/719/57,https://picsum.photos/472/902,https://picsum.photos/44/756,https://www.reid-sherman.com/,102.615444,217.106464
150,6,3,https://placekitten.com/348/298,Visit tonight draw.,290,122,2023-01-06,2,https://schneider-porter.net/,Break attack until industry much put still.,434,2023-01-02,13.0,https://www.sanchez.com/,Teacher man property rise day choice growth.,314.0,2023-01-13,Kevin Gibson,kevingibson,As fall land unit.,www.kevingibson.com,355,331,Kevin Gibson,kevingibson,Nature clearly politics opportunity card.,www.kevingibson.com,3148,949,1980-01-03,Year score anyone southern sure color. Owner f...,Lake Sara,other,https://placekitten.com/121/784,https://picsum.photos/73/649,https://picsum.photos/506/668,https://picsum.photos/888/159,https://placekitten.com/217/158,https://placekitten.com/401/687,https://picsum.photos/84/68,https://miller.biz/,75.470832,157.374435
154,6,3,https://placekitten.com/348/298,Visit tonight draw.,290,122,2023-01-06,2,https://schneider-porter.net/,Break attack until industry much put still.,434,2023-01-02,27.0,http://www.christensen.com/,Order his say full expect level talk conference.,427.0,2023-01-27,Kevin Gibson,kevingibson,As fall land unit.,www.kevingibson.com,355,331,Kevin Gibson,kevingibson,Nature clearly politics opportunity card.,www.kevingibson.com,3148,949,1980-01-03,Year score anyone southern sure color. Owner f...,Lake Sara,other,https://placekitten.com/121/784,https://picsum.photos/73/649,https://picsum.photos/506/668,https://picsum.photos/888/159,https://placekitten.com/217/158,https://placekitten.com/401/687,https://picsum.photos/84/68,https://miller.biz/,75.470832,157.374435


## Save the dummy data to SQLite database

In [17]:
conn = sqlite3.connect('../database/dummy_data.db')  # Connect to the database

extended_users.to_sql('extended_users', conn, if_exists='replace', index=False)
extended_posts.to_sql('extended_posts', conn, if_exists='replace', index=False)
extended_igtv_videos.to_sql('extended_igtv_videos', conn, if_exists='replace', index=False)
extended_reels.to_sql('extended_reels', conn, if_exists='replace', index=False)
extended_merged_data.to_sql('extended_merged_data', conn, if_exists='replace', index=False)

# Close the connection
conn.close()