## Question 1: **Answer**

In [1]:
import pandas as pd
import sqlite3

In [None]:
def add_data_to_database(input_data):
    try:
        df = pd.read_csv(input_data)
        engine = sqlite3.connect("social_media_data.db")
        df.to_sql('social_media',con=engine, index = False)
    except Exception as e:
            print(f"Error: {e}")

data = "/content/social_media_behavior.csv"
add_data_to_database(data)

In [3]:
# Loading SQL extension
%load_ext sql

In [4]:
# Connecting to the database
%sql sqlite:///social_media_data.db

In [5]:
# Default to duplicated style
%config SqlMagic.style = '_DEPRECATED_DEFAULT'

#### 1. View the first 5 rows

In [6]:
%%sql
SELECT * FROM social_media
LIMIT 5;

 * sqlite:///social_media_data.db
Done.


id,gender,age,education,profession,workDuration,typeSocial,useSocial,productivity,socialDuration
1,male,35,master,manager,8 years,"facebook,twitter,youtube",often,yes,6
2,male,27,degree,officer,5 years,"facebook,youtube,tiktok,snapchat",regulary,no,7
3,female,24,degree,officer,4 years,"facebook,twitter,linkedin,tiktok,snapchat,youtube,instagram,pinterest",everytime,no,4
4,male,35,master,manager,8 years,"facebook,twitter,youtube",often,yes,8
5,male,27,degree,officer,5 years,"facebook,youtube,tiktok,snapchat",regulary,no,6


## Question 2: **Answer**

In [7]:
%%sql
SELECT
    more_than_3_accts,
    all_users_count,
    ROUND(more_than_3_accts / CAST((all_users_count) AS REAL) * 100, 2)
    AS percentage
FROM
    -- Subquery to return users with more than 3 accounts
    (SELECT
       SUM(CASE WHEN (LENGTH(typeSocial) - LENGTH(REPLACE(typeSocial, ',', '')) + 1) > 3 THEN 1
                ELSE 0
            END) AS more_than_3_accts
    FROM social_media),
    -- Subquery to count all users
    (SELECT
        COUNT(*) AS all_users_count
    FROM social_media)

 * sqlite:///social_media_data.db
Done.


more_than_3_accts,all_users_count,percentage
28,39,71.79


## Question 3: **Answer**

In [8]:
%%sql
SELECT
    -- Users with master's or are managers AND have more than 3 accounts
    SUM(CASE
            WHEN (education = 'master' OR profession = 'manager')
                 AND (LENGTH(typeSocial) - LENGTH(REPLACE(typeSocial, ',', '')) + 1) > 3
            THEN 1
            ELSE 0
        END
    ) AS more_than_3_accounts,

    -- Users with master's or are managers AND have less than 3 accounts
    SUM(CASE
            WHEN (education = 'master' OR profession = 'manager')
                 AND (LENGTH(typeSocial) - LENGTH(REPLACE(typeSocial, ',', '')) + 1) < 3
            THEN 1
            ELSE 0
        END
    ) AS fewer_than_3_accounts
FROM social_media;

 * sqlite:///social_media_data.db
Done.


more_than_3_accounts,fewer_than_3_accounts
3,0


## Question 4: **Answer**



In [9]:
%%sql
SELECT
    -- Count and percentage for age group 20-29
    age_group_20_29_count,
    ROUND((age_group_20_29_count / age_group_20_29_total_count * 100), 2)
    AS percentage_20_29,

    -- Count and percentage for age group 30-39
    age_group_30_39_count,
    ROUND((age_group_30_39_count / age_group_30_39_total_count * 100), 2)
    AS percentage_30_39
FROM (
    SELECT
        -- Users aged 20–29 who use social media "everytime"
        SUM(CASE WHEN age BETWEEN 20 AND 29 AND usesocial = 'everytime' THEN 1 ELSE 0 END)
        AS age_group_20_29_count,

        -- Total users aged 20–29
        CAST(SUM(CASE WHEN age BETWEEN 20 AND 29 THEN 1 ELSE 0 END) AS REAL)
        AS age_group_20_29_total_count,

        -- Users aged 30–39 who use social media "everytime"
        SUM(CASE WHEN age BETWEEN 30 AND 39 AND usesocial = 'everytime' THEN 1 ELSE 0 END)
        AS age_group_30_39_count,

        -- Total users aged 30–39
        CAST(SUM(CASE WHEN age BETWEEN 30 AND 39 THEN 1 ELSE 0 END) AS REAL)
        AS age_group_30_39_total_count
    FROM social_media
) AS group_analysis;

 * sqlite:///social_media_data.db
Done.


age_group_20_29_count,percentage_20_29,age_group_30_39_count,percentage_30_39
13,59.09,2,15.38


## Question 5: **Answer**

In [10]:
%%sql
SELECT
    -- Count of male users
    SUM(CASE WHEN gender = 'male' THEN 1 ELSE 0 END) AS male_count,

    -- Count of female users
    SUM(CASE WHEN gender = 'female' THEN 1 ELSE 0 END) AS female_count,

    -- Percentage of female users compared to male users
    ROUND((SUM(CASE WHEN gender = 'female' THEN 1 ELSE 0 END) * 100.0) /
        NULLIF(SUM(CASE WHEN gender = 'male' THEN 1 ELSE 0 END), 0),2 )
        AS female_to_male_percentage
FROM social_media
WHERE
    -- Filter for users with more than three social media accounts
    (LENGTH(typeSocial) - LENGTH(REPLACE(typeSocial, ',', '')) + 1) > 3
    AND socialDuration BETWEEN 5 AND 10;

 * sqlite:///social_media_data.db
Done.


male_count,female_count,female_to_male_percentage
8,7,87.5
