In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv("data/responses.csv")

In [3]:
df.columns

Index(['Timestamp', ' What is the name of your business? (Optional)',
       'What type of products or services does your business offer?',
       'What is the size of your business?',
       'How long has your business been operating?',
       'Do you use social media platforms for advertising?',
       'Which social media platforms do you advertise on? (Select all that apply)',
       'What is your monthly budget for social media advertisements?',
       'How do you measure the effectiveness of your social media ads? (Select all that apply.)',
       'How do customers usually engage with your social media ads? (Select all that apply.)',
       'How often do your social media posts receive customer engagement (likes, shares, comments)?',
       'Do you use other forms of advertising besides social media?',
       'If yes, which traditional advertising methods do you use? (Select all that apply.)',
       'What challenges do you face in using social media advertising? (Select all that 

In [5]:
df.rename(columns={'What type of products or services does your business offer?': 'type',
                  'Which social media platforms do you advertise on? (Select all that apply)': "social_media",
                  'Have you noticed any changes in sales performance since using social media?': "sales_performance",
                  'Do you believe social media has a positive impact on your sales performance?': "positive_impact",
                  'Have you noticed any correlation between social media engagement and sales performance?': "social_vs_sales"}, inplace=True)

In [6]:
df["type"] = df["type"].str.strip()
df["type"].unique()

array(['Electronics & Gadgets', 'Clothing & Apparel', 'Accessories',
       'Barber', 'Cosmetics and deodorant', 'Kitchen utensils',
       'Food & Beverages', 'Construction', 'Crochet wears',
       'Chemist store', 'Drycleaning', 'Perfume', 'Drugs', 'Finance',
       'Cooking gas', 'Nothing', 'Internet, designs', 'Fashion designer',
       'Household items', 'Home essentials', 'Banking system',
       'Electrical', 'Photography', 'Makeup artist',
       'Frame, Throw Pillow, Mug, Magic Mug, and other gift items for birthday surprises',
       'Haircut', 'Farming  and poultry', 'Graphics', 'Clothing and food',
       'Tech', 'Catering and Decoration', 'Loc’s and carve',
       'Science Lab/Equipments', 'Educational', 'Petty trade',
       'Educational institute', 'Snacks', 'Hair'], dtype=object)

In [7]:
home = ['Electronics & Gadgets', 'Accessories', 'Kitchen utensils', 'Construction', 'Cooking gas', 'Electrical', 'Photography',
         'Internet, designs', 'Household items', 'Home essentials', 'Graphics','Tech', 'Science Lab/Equipments']
cloth = ['Clothing & Apparel', 'Barber', 'Cosmetics and deodorant', 'Crochet wears', 'Drycleaning', 'Perfume', 'Finance', 'Fashion designer',
        'Banking system', 'Makeup artist', 'Frame, Throw Pillow, Mug, Magic Mug, and other gift items for birthday surprises', 'Haircut',
        'Catering and Decoration', "Hair", 'Loc’s and carve']
food = ["Food & Beverages", 'Chemist store', 'Drugs', 'Farming  and poultry',  'Educational', 'Petty trade', 'Educational institute',
        'Clothing and food', 'Snacks']

In [9]:
df.loc[:, "categories"] = np.nan
for i, types in enumerate(df["type"]):
    if types in home:
        df.loc[i, "categories"] = 'Home Appliances'
    elif types in cloth:
        df.loc[i, "categories"] = 'Clothing & Apparel'
    elif types in food:
        df.loc[i, "categories"] = 'Food & Beverages'
    else:
        df.loc[i, "categories"] = np.nan

In [10]:
df['Do you use social media platforms for advertising?'].value_counts()

Do you use social media platforms for advertising?
Yes    96
No      6
Name: count, dtype: int64

In [11]:
df1 = df[df['Do you use social media platforms for advertising?']=="Yes"]

In [12]:
df1['Do you use social media platforms for advertising?'].value_counts()

Do you use social media platforms for advertising?
Yes    96
Name: count, dtype: int64

In [13]:
advert = df1[["type", "social_media", "categories", "sales_performance", "positive_impact", "social_vs_sales"]]
advert = advert.copy()

In [22]:
advert.dropna(inplace=True)
advert.reset_index(drop=True, inplace=True)

In [23]:
advert.head()

Unnamed: 0,type,social_media,categories,sales_performance,positive_impact,social_vs_sales
0,Electronics & Gadgets,"Facebook, Tik-Tok",Home Appliances,"Yes, a significant increase",Strongly agree,"Yes, high engagement leads to more sales"
1,Clothing & Apparel,"Facebook, Instagram, Twitter/X, Tik-Tok",Clothing & Apparel,"Yes, a significant increase",Strongly agree,"Yes, high engagement leads to more sales"
2,Accessories,Tik-Tok,Home Appliances,"No, noticeable change",Agree,"Yes, high engagement leads to more sales"
3,Barber,Tik-Tok,Clothing & Apparel,"Yes, a slight increase",Agree,"Sometimes, but not always"
4,Clothing & Apparel,"Instagram, Twitter/X, WhatsApp",Clothing & Apparel,"Yes, a significant increase",Agree,"Yes, high engagement leads to more sales"


In [24]:
advert["categories"].value_counts(dropna=False)

categories
Clothing & Apparel    40
Home Appliances       28
Food & Beverages      27
Name: count, dtype: int64

In [25]:
advert.columns

Index(['type', 'social_media', 'categories', 'sales_performance',
       'positive_impact', 'social_vs_sales'],
      dtype='object')

In [26]:
advert.shape

(95, 6)

In [27]:
advert.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95 entries, 0 to 94
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   type               95 non-null     object
 1   social_media       95 non-null     object
 2   categories         95 non-null     object
 3   sales_performance  95 non-null     object
 4   positive_impact    95 non-null     object
 5   social_vs_sales    95 non-null     object
dtypes: object(6)
memory usage: 4.6+ KB


In [28]:
advert.to_csv("data/advert_categories.csv")