In [1]:
import pandas as pd
import numpy as np

## DATA FORMATTING

In [2]:
Eabl_Mentions = pd.read_csv("EABL_mentions.csv")
Eabl_hashtags = pd.read_excel("Hashtags.xlsx")

In [3]:
print("EABL Mentions Shape:", Eabl_Mentions.shape)
print("EABL Hashtags Shape:", Eabl_hashtags.shape)

EABL Mentions Shape: (6136, 6)
EABL Hashtags Shape: (4666, 6)


##### EABL MENTIONS FROM TWITTER

In [4]:
# checking the 1st 5 rows in the dataset
Eabl_Mentions.head(5)

Unnamed: 0.1,Unnamed: 0,data,text,sentiment,stats,links
0,8,https://twitter.com/Fahrisee/status/1743859828...,"Guys, do you remember Shaffie Weru Dj Joe Mfal...",NEGATIVE,"Jan 7, 2024 · 4:59 AM UTC","{'comments': 14, 'retweets': 40, 'quotes': 1, ..."
1,10,https://twitter.com/HonKangata/status/17447694...,Drilling water at Kenneth Matiba Hospital Make...,POSITIVE,"Jan 9, 2024 · 5:13 PM UTC","{'comments': 18, 'retweets': 29, 'quotes': 1, ..."
2,11,https://twitter.com/eabl2h9h22/status/17451161...,+1ครับ,,"Jan 10, 2024 · 4:11 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l..."
3,12,https://twitter.com/Osama_otero/status/1744717...,Kenyans wakitambua hii beer it is over for EABL,NEGATIVE,"Jan 9, 2024 · 1:45 PM UTC","{'comments': 33, 'retweets': 122, 'quotes': 2,..."
4,13,https://twitter.com/LewellaBee/status/17450941...,I thought EABL products are unwanted around th...,NEUTRAL,"Jan 10, 2024 · 2:43 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l..."


In [5]:
# Drop unnecessary columns
Eabl_Mentions = Eabl_Mentions.drop(['Unnamed: 0','data'], axis=1)
Eabl_Mentions.head()

Unnamed: 0,text,sentiment,stats,links
0,"Guys, do you remember Shaffie Weru Dj Joe Mfal...",NEGATIVE,"Jan 7, 2024 · 4:59 AM UTC","{'comments': 14, 'retweets': 40, 'quotes': 1, ..."
1,Drilling water at Kenneth Matiba Hospital Make...,POSITIVE,"Jan 9, 2024 · 5:13 PM UTC","{'comments': 18, 'retweets': 29, 'quotes': 1, ..."
2,+1ครับ,,"Jan 10, 2024 · 4:11 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l..."
3,Kenyans wakitambua hii beer it is over for EABL,NEGATIVE,"Jan 9, 2024 · 1:45 PM UTC","{'comments': 33, 'retweets': 122, 'quotes': 2,..."
4,I thought EABL products are unwanted around th...,NEUTRAL,"Jan 10, 2024 · 2:43 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l..."


In [6]:
Eabl_Mentions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6136 entries, 0 to 6135
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   text       6119 non-null   object
 1   sentiment  11 non-null     object
 2   stats      6136 non-null   object
 3   links      6136 non-null   object
dtypes: object(4)
memory usage: 191.9+ KB


In [7]:
import re
import string

def clean_text(text):
    # Check if the text is NaN
    if pd.notnull(text):
        # Convert to lowercase
        text = text.lower()
        
        # Remove special characters, links, and user mentions
        text = re.sub(r'http\S+|www\S+|@[^\s]+', '', text)
        text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
        text = text.strip()  # Remove leading and trailing whitespaces
        
        # Remove Unicode characters, emojis, and other non-alphanumeric characters
        text = re.sub(r'[^\x00-\x7F]+', '', text)
        
        # Remove punctuation
        text = text.translate(str.maketrans('', '', string.punctuation))
        
        # Add additional cleaning steps as needed
    
    return text

# Apply the clean_text function to the 'text' column
Eabl_Mentions['cleaned_text'] = Eabl_Mentions['text'].apply(clean_text)

# Display the DataFrame with the cleaned_text column
Eabl_Mentions[['text', 'cleaned_text']].head()


Unnamed: 0,text,cleaned_text
0,"Guys, do you remember Shaffie Weru Dj Joe Mfal...",guys do you remember shaffie weru dj joe mfalm...
1,Drilling water at Kenneth Matiba Hospital Make...,drilling water at kenneth matiba hospital make...
2,+1ครับ,1
3,Kenyans wakitambua hii beer it is over for EABL,kenyans wakitambua hii beer it is over for eabl
4,I thought EABL products are unwanted around th...,i thought eabl products are unwanted around th...


In [8]:
import ast

Eabl_Mentions['stats_dict'] = Eabl_Mentions['links'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) else {})

# Extract 'comments', 'retweets', 'quotes', and 'likes' directly from the dictionary
Eabl_Mentions['comments'] = Eabl_Mentions['stats_dict'].apply(lambda x: x.get('comments', 0))
Eabl_Mentions['retweets'] = Eabl_Mentions['stats_dict'].apply(lambda x: x.get('retweets', 0))
Eabl_Mentions['quotes'] = Eabl_Mentions['stats_dict'].apply(lambda x: x.get('quotes', 0))
Eabl_Mentions['likes'] = Eabl_Mentions['stats_dict'].apply(lambda x: x.get('likes', 0))

# Drop the intermediate 'stats_dict' column
Eabl_Mentions = Eabl_Mentions.drop('stats_dict', axis=1)

# Display the DataFrame with the new columns
Eabl_Mentions.head(10)

Unnamed: 0,text,sentiment,stats,links,cleaned_text,comments,retweets,quotes,likes
0,"Guys, do you remember Shaffie Weru Dj Joe Mfal...",NEGATIVE,"Jan 7, 2024 · 4:59 AM UTC","{'comments': 14, 'retweets': 40, 'quotes': 1, ...",guys do you remember shaffie weru dj joe mfalm...,14,40,1,157
1,Drilling water at Kenneth Matiba Hospital Make...,POSITIVE,"Jan 9, 2024 · 5:13 PM UTC","{'comments': 18, 'retweets': 29, 'quotes': 1, ...",drilling water at kenneth matiba hospital make...,18,29,1,194
2,+1ครับ,,"Jan 10, 2024 · 4:11 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",1,0,0,0,0
3,Kenyans wakitambua hii beer it is over for EABL,NEGATIVE,"Jan 9, 2024 · 1:45 PM UTC","{'comments': 33, 'retweets': 122, 'quotes': 2,...",kenyans wakitambua hii beer it is over for eabl,33,122,2,537
4,I thought EABL products are unwanted around th...,NEUTRAL,"Jan 10, 2024 · 2:43 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",i thought eabl products are unwanted around th...,0,0,0,0
5,ตอบผมด้วยนะครับ,,"Jan 10, 2024 · 2:39 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",,0,0,0,1
6,ถึงหน้าบ้านในทันที55,,"Jan 10, 2024 · 2:00 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",55,0,0,0,0
7,ผมๆๆๆ5,,"Jan 10, 2024 · 11:58 AM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",5,0,0,0,0
8,ขอไอดีไลน์ครีบ,,"Jan 10, 2024 · 10:50 AM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",,0,0,0,0
9,Unajua sai amafanya EABL. Anaweza tupea a year...,NEUTRAL,"Jan 10, 2024 · 10:12 AM UTC","{'comments': 1, 'retweets': 0, 'quotes': 0, 'l...",unajua sai amafanya eabl anaweza tupea a years...,1,0,0,0


In [9]:
# Remove 'text' and 'links' columns
Eabl_Mentions = Eabl_Mentions.drop(['text', 'links','sentiment'], axis=1)

# Move 'sentiment' column after 'cleaned_text'
columns_order = ['cleaned_text','stats','comments', 'retweets', 'quotes', 'likes']
Eabl_Mentions = Eabl_Mentions[columns_order]

# Display the updated DataFrame
Eabl_Mentions.head()

Unnamed: 0,cleaned_text,stats,comments,retweets,quotes,likes
0,guys do you remember shaffie weru dj joe mfalm...,"Jan 7, 2024 · 4:59 AM UTC",14,40,1,157
1,drilling water at kenneth matiba hospital make...,"Jan 9, 2024 · 5:13 PM UTC",18,29,1,194
2,1,"Jan 10, 2024 · 4:11 PM UTC",0,0,0,0
3,kenyans wakitambua hii beer it is over for eabl,"Jan 9, 2024 · 1:45 PM UTC",33,122,2,537
4,i thought eabl products are unwanted around th...,"Jan 10, 2024 · 2:43 PM UTC",0,0,0,0


In [10]:
from textblob import TextBlob

# Add a new column 'sentiment' based on the sentiment of the 'News Heading' column
Eabl_Mentions['sentiment'] = Eabl_Mentions['cleaned_text'].apply(lambda x: TextBlob(str(x)).sentiment.polarity)

# Classify sentiment into 'positive', 'negative', or 'neutral'
Eabl_Mentions['sentiment'] = Eabl_Mentions['sentiment'].apply(lambda x: 'Positive' if x > 0 else ('Negative' if x < 0 else 'Neutral'))

columns_order = ['cleaned_text','sentiment', 'stats', 'comments', 'retweets', 'quotes', 'likes']
Eabl_Mentions = Eabl_Mentions[columns_order]

Eabl_Mentions.head(3)

Unnamed: 0,cleaned_text,sentiment,stats,comments,retweets,quotes,likes
0,guys do you remember shaffie weru dj joe mfalm...,Negative,"Jan 7, 2024 · 4:59 AM UTC",14,40,1,157
1,drilling water at kenneth matiba hospital make...,Positive,"Jan 9, 2024 · 5:13 PM UTC",18,29,1,194
2,1,Neutral,"Jan 10, 2024 · 4:11 PM UTC",0,0,0,0


In [11]:
# Remove rows with only numerical values in 'cleaned_text'
Eabl_Mentions = Eabl_Mentions[~Eabl_Mentions['cleaned_text'].astype(str).str.isnumeric()]

# Drop rows with blank values in the 'cleaned_text' column
Eabl_Mentions = Eabl_Mentions.dropna(subset=['cleaned_text'])

# Reset index after removing rows
Eabl_Mentions = Eabl_Mentions.reset_index(drop=True)

# Display the updated DataFrame
Eabl_Mentions.head(10)

Unnamed: 0,cleaned_text,sentiment,stats,comments,retweets,quotes,likes
0,guys do you remember shaffie weru dj joe mfalm...,Negative,"Jan 7, 2024 · 4:59 AM UTC",14,40,1,157
1,drilling water at kenneth matiba hospital make...,Positive,"Jan 9, 2024 · 5:13 PM UTC",18,29,1,194
2,kenyans wakitambua hii beer it is over for eabl,Neutral,"Jan 9, 2024 · 1:45 PM UTC",33,122,2,537
3,i thought eabl products are unwanted around th...,Neutral,"Jan 10, 2024 · 2:43 PM UTC",0,0,0,0
4,,Neutral,"Jan 10, 2024 · 2:39 PM UTC",0,0,0,1
5,,Neutral,"Jan 10, 2024 · 10:50 AM UTC",0,0,0,0
6,unajua sai amafanya eabl anaweza tupea a years...,Neutral,"Jan 10, 2024 · 10:12 AM UTC",1,0,0,0
7,,Neutral,"Jan 10, 2024 · 9:56 AM UTC",1,1,0,5
8,eabl have done the thingexpect changes to pric...,Neutral,"Jan 10, 2024 · 9:30 AM UTC",0,0,0,0
9,,Neutral,"Jan 10, 2024 · 9:25 AM UTC",0,0,0,0


In [12]:
Eabl_Mentions['sentiment'].value_counts()

Neutral     2876
Positive    2315
Negative     919
Name: sentiment, dtype: int64

In [13]:
Eabl_Mentions.to_csv("FinalMentions.CSV")

##### EABL HASHTAGS FROM TWITTER

In [14]:
Eabl_hashtags.head()

Unnamed: 0.1,Unnamed: 0,data,text,stats,links,sentiment
0,0,https://twitter.com/laicymaina/status/17103068...,"Today is the day, Tupatane Blankets courtesy o...","Oct 6, 2023 · 2:51 PM UTC","{'comments': 0, 'retweets': 3, 'quotes': 0, 'l...",Positive
1,1,https://twitter.com/Elvis_w_g/status/170890348...,You should see these free online certification...,"Oct 2, 2023 · 5:55 PM UTC","{'comments': 1, 'retweets': 0, 'quotes': 0, 'l...",Positive
2,2,https://twitter.com/Elvis_w_g/status/170890256...,You should learn data analysis. Here are the...,"Oct 2, 2023 · 5:51 PM UTC","{'comments': 1, 'retweets': 0, 'quotes': 0, 'l...",Positive
3,3,https://twitter.com/mbest0344/status/170888825...,kustretch kidogo apa na pale then nirudi kudan...,"Oct 2, 2023 · 4:54 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",Negative
4,4,https://twitter.com/mbest0344/status/170879405...,#CustomerServiceWeek Akothee #MondayMotivation...,"Oct 2, 2023 · 10:40 AM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",Positive


In [15]:
# Drop unnecessary columns
Eabl_hashtags = Eabl_hashtags.drop(['Unnamed: 0', 'data'], axis=1)
Eabl_hashtags.head()

Unnamed: 0,text,stats,links,sentiment
0,"Today is the day, Tupatane Blankets courtesy o...","Oct 6, 2023 · 2:51 PM UTC","{'comments': 0, 'retweets': 3, 'quotes': 0, 'l...",Positive
1,You should see these free online certification...,"Oct 2, 2023 · 5:55 PM UTC","{'comments': 1, 'retweets': 0, 'quotes': 0, 'l...",Positive
2,You should learn data analysis. Here are the...,"Oct 2, 2023 · 5:51 PM UTC","{'comments': 1, 'retweets': 0, 'quotes': 0, 'l...",Positive
3,kustretch kidogo apa na pale then nirudi kudan...,"Oct 2, 2023 · 4:54 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",Negative
4,#CustomerServiceWeek Akothee #MondayMotivation...,"Oct 2, 2023 · 10:40 AM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",Positive


In [16]:
# Apply the clean_text function to the 'text' column
Eabl_hashtags['cleaned_text'] = Eabl_hashtags['text'].apply(clean_text)

# Display the DataFrame with the cleaned_text column
Eabl_hashtags[['text', 'cleaned_text']].head()

Unnamed: 0,text,cleaned_text
0,"Today is the day, Tupatane Blankets courtesy o...",today is the day tupatane blankets courtesy of...
1,You should see these free online certification...,you should see these free online certification...
2,You should learn data analysis. Here are the...,you should learn data analysis here are the da...
3,kustretch kidogo apa na pale then nirudi kudan...,kustretch kidogo apa na pale then nirudi kudan...
4,#CustomerServiceWeek Akothee #MondayMotivation...,customerserviceweek akothee mondaymotivation h...


In [17]:
Eabl_hashtags['stats_dict'] = Eabl_hashtags['links'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) else {})

# Extract 'comments', 'retweets', 'quotes', and 'likes' directly from the dictionary
Eabl_hashtags['comments'] = Eabl_hashtags['stats_dict'].apply(lambda x: x.get('comments', 0))
Eabl_hashtags['retweets'] = Eabl_hashtags['stats_dict'].apply(lambda x: x.get('retweets', 0))
Eabl_hashtags['quotes'] = Eabl_hashtags['stats_dict'].apply(lambda x: x.get('quotes', 0))
Eabl_hashtags['likes'] = Eabl_hashtags['stats_dict'].apply(lambda x: x.get('likes', 0))

# Drop the intermediate 'stats_dict' column
Eabl_hashtags = Eabl_hashtags.drop('stats_dict', axis=1)

# Display the DataFrame with the new columns
Eabl_hashtags.head(5)

Unnamed: 0,text,stats,links,sentiment,cleaned_text,comments,retweets,quotes,likes
0,"Today is the day, Tupatane Blankets courtesy o...","Oct 6, 2023 · 2:51 PM UTC","{'comments': 0, 'retweets': 3, 'quotes': 0, 'l...",Positive,today is the day tupatane blankets courtesy of...,0,3,0,2
1,You should see these free online certification...,"Oct 2, 2023 · 5:55 PM UTC","{'comments': 1, 'retweets': 0, 'quotes': 0, 'l...",Positive,you should see these free online certification...,1,0,0,4
2,You should learn data analysis. Here are the...,"Oct 2, 2023 · 5:51 PM UTC","{'comments': 1, 'retweets': 0, 'quotes': 0, 'l...",Positive,you should learn data analysis here are the da...,1,0,0,3
3,kustretch kidogo apa na pale then nirudi kudan...,"Oct 2, 2023 · 4:54 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",Negative,kustretch kidogo apa na pale then nirudi kudan...,0,0,0,0
4,#CustomerServiceWeek Akothee #MondayMotivation...,"Oct 2, 2023 · 10:40 AM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",Positive,customerserviceweek akothee mondaymotivation h...,0,0,0,1


In [18]:
# Remove 'text' and 'links' columns
# Eabl_hashtags = Eabl_hashtags.drop(['text', 'links'], axis=1)

# Move 'sentiment' column after 'cleaned_text'
columns_order = ['cleaned_text','sentiment', 'stats', 'comments', 'retweets', 'quotes', 'likes']
Eabl_hashtags = Eabl_hashtags[columns_order]

# Display the updated DataFrame
Eabl_hashtags.head()

Unnamed: 0,cleaned_text,sentiment,stats,comments,retweets,quotes,likes
0,today is the day tupatane blankets courtesy of...,Positive,"Oct 6, 2023 · 2:51 PM UTC",0,3,0,2
1,you should see these free online certification...,Positive,"Oct 2, 2023 · 5:55 PM UTC",1,0,0,4
2,you should learn data analysis here are the da...,Positive,"Oct 2, 2023 · 5:51 PM UTC",1,0,0,3
3,kustretch kidogo apa na pale then nirudi kudan...,Negative,"Oct 2, 2023 · 4:54 PM UTC",0,0,0,0
4,customerserviceweek akothee mondaymotivation h...,Positive,"Oct 2, 2023 · 10:40 AM UTC",0,0,0,1


In [19]:
#dropping duplicates
Eabl_hashtags = Eabl_hashtags.drop_duplicates()

In [20]:
Eabl_hashtags['sentiment'].value_counts()

Positive    2368
Neutral     1721
Negative     555
Name: sentiment, dtype: int64

In [21]:
Eabl_hashtags.to_csv("FinalHashtags.CSV")

##### EABL NEWS FROM GOOGLE NEWS

In [22]:
googleNews = pd.read_excel('GoogleNewsData.xlsx')
googleNews.head(10)

Unnamed: 0,News Source,News Heading
0,The Star Kenya,Kenya Breweries introduces Baileys Delight dri...
1,Tuko.co.ke,Valentine's Days: Baileys Delight among afford...
2,BellaNaija,Check Out Some of the Delightful Moments from ...
3,The Star Kenya,Here's a delicious Bailey's Delight chocolate ...
4,Guardian Nigeria,"Guinness Nigeria Introduces Baileys Delight, a..."
5,GhanaWeb,Fun facts about Baileys Delight that will exci...
6,BellaNaija,Baileys Delight Crowns the Year With 12 Days o...
7,Pulse Ghana,Guinness Ghana Breweries PLC launches Baileys ...
8,Daily Post Nigeria,12 Delight Treats: Baileys Nigeria closes cale...
9,Nairametrics,Official: Guinness now produces Baileys from N...


In [23]:
# pip install textblob

In [24]:
#creating sentiment column
from textblob import TextBlob

# Add a new column 'sentiment' based on the sentiment of the 'News Heading' column
googleNews['sentiment'] = googleNews['News Heading'].apply(lambda x: TextBlob(str(x)).sentiment.polarity)

# Classify sentiment into 'positive', 'negative', or 'neutral'
googleNews['sentiment'] = googleNews['sentiment'].apply(lambda x: 'Positive' if x > 0 else ('Negative' if x < 0 else 'Neutral'))

googleNews.head(10)

Unnamed: 0,News Source,News Heading,sentiment
0,The Star Kenya,Kenya Breweries introduces Baileys Delight dri...,Neutral
1,Tuko.co.ke,Valentine's Days: Baileys Delight among afford...,Positive
2,BellaNaija,Check Out Some of the Delightful Moments from ...,Positive
3,The Star Kenya,Here's a delicious Bailey's Delight chocolate ...,Positive
4,Guardian Nigeria,"Guinness Nigeria Introduces Baileys Delight, a...",Positive
5,GhanaWeb,Fun facts about Baileys Delight that will exci...,Positive
6,BellaNaija,Baileys Delight Crowns the Year With 12 Days o...,Neutral
7,Pulse Ghana,Guinness Ghana Breweries PLC launches Baileys ...,Neutral
8,Daily Post Nigeria,12 Delight Treats: Baileys Nigeria closes cale...,Positive
9,Nairametrics,Official: Guinness now produces Baileys from N...,Neutral


In [25]:
# Apply the clean_text function to the 'text' column
googleNews['cleaned_text'] = googleNews['News Heading'].apply(clean_text)

# Display the DataFrame with the cleaned_text column
googleNews.head()

Unnamed: 0,News Source,News Heading,sentiment,cleaned_text
0,The Star Kenya,Kenya Breweries introduces Baileys Delight dri...,Neutral,kenya breweries introduces baileys delight dri...
1,Tuko.co.ke,Valentine's Days: Baileys Delight among afford...,Positive,valentines days baileys delight among affordab...
2,BellaNaija,Check Out Some of the Delightful Moments from ...,Positive,check out some of the delightful moments from ...
3,The Star Kenya,Here's a delicious Bailey's Delight chocolate ...,Positive,heres a delicious baileys delight chocolate ca...
4,Guardian Nigeria,"Guinness Nigeria Introduces Baileys Delight, a...",Positive,guinness nigeria introduces baileys delight a ...


In [26]:
# Remove News Heading column
googleNews = googleNews.drop('News Heading', axis=1)

# Move 'sentiment' column after 'cleaned_text'
columns_order = ['News Source', 'cleaned_text', 'sentiment']
googleNews = googleNews[columns_order]

# Display the updated DataFrame
googleNews.head()

Unnamed: 0,News Source,cleaned_text,sentiment
0,The Star Kenya,kenya breweries introduces baileys delight dri...,Neutral
1,Tuko.co.ke,valentines days baileys delight among affordab...,Positive
2,BellaNaija,check out some of the delightful moments from ...,Positive
3,The Star Kenya,heres a delicious baileys delight chocolate ca...,Positive
4,Guardian Nigeria,guinness nigeria introduces baileys delight a ...,Positive


In [27]:
googleNews['sentiment'].value_counts()

Neutral     680
Positive    393
Negative    131
Name: sentiment, dtype: int64

In [28]:
googleNews.to_csv("FinalGoogleNews.csv")

##### EABL Brands 

In [29]:
eabl_brands = pd.read_csv("eabl products.csv")
eabl_brands.head(5)

Unnamed: 0.1,Unnamed: 0,link,text,date,stats,links,user,location,likes,retweets,replies,sentiment_score,sentiment
0,0,https://twitter.com/Infinity101430/status/1745...,こりゃうめぇ🤤 #ラム酒 #Captain Morgan #こりゃうめぇ,"Jan 11, 2024 · 10:10 AM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",[],@Infinity101430,,11.0,0.0,0,0.0,neutral
1,1,https://twitter.com/FoodieandSports/status/173...,Check out Captain Morgan Hat + Captain Morgan...,"Dec 21, 2023 · 11:33 PM UTC","{'comments': 0, 'retweets': 7, 'quotes': 0, 'l...",[],@FoodieandSports,,0.0,7.0,0,0.0,neutral
2,2,https://twitter.com/officialsky247/status/1744...,The Hitman is just 5 sixes away from breaking ...,"Jan 8, 2024 · 5:15 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",[],@officialsky247,,0.0,0.0,0,0.0,neutral
3,3,https://twitter.com/jharinarayanan/status/1740...,"It is not America, Morgan, Phillips or even Ka...","Dec 28, 2023 · 5:35 AM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",[],@jharinarayanan,,0.0,0.0,0,0.0,neutral
4,4,https://twitter.com/EdeshSeba/status/174013161...,Wales captain Jac Morgan to miss Scotland Six ...,"Dec 27, 2023 · 10:04 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",[],@EdeshSeba,,0.0,0.0,0,-0.5267,negative


In [30]:
eabl_brands.columns

Index(['Unnamed: 0', 'link', 'text', 'date', 'stats', 'links', 'user',
       'location', 'likes', 'retweets', 'replies', 'sentiment_score',
       'sentiment'],
      dtype='object')

In [31]:
eabl_brands = eabl_brands.drop(['Unnamed: 0','link','links','user','location','sentiment_score','likes','retweets','replies'], axis=1)
eabl_brands.head()

Unnamed: 0,text,date,stats,sentiment
0,こりゃうめぇ🤤 #ラム酒 #Captain Morgan #こりゃうめぇ,"Jan 11, 2024 · 10:10 AM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",neutral
1,Check out Captain Morgan Hat + Captain Morgan...,"Dec 21, 2023 · 11:33 PM UTC","{'comments': 0, 'retweets': 7, 'quotes': 0, 'l...",neutral
2,The Hitman is just 5 sixes away from breaking ...,"Jan 8, 2024 · 5:15 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",neutral
3,"It is not America, Morgan, Phillips or even Ka...","Dec 28, 2023 · 5:35 AM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",neutral
4,Wales captain Jac Morgan to miss Scotland Six ...,"Dec 27, 2023 · 10:04 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",negative


In [32]:
# Apply the clean_text function to the 'text' column
eabl_brands['cleaned_text'] = eabl_brands['text'].apply(clean_text)

# Display the DataFrame with the cleaned_text column
eabl_brands[['text', 'cleaned_text']].head()

Unnamed: 0,text,cleaned_text
0,こりゃうめぇ🤤 #ラム酒 #Captain Morgan #こりゃうめぇ,captain morgan
1,Check out Captain Morgan Hat + Captain Morgan...,check out captain morgan hat captain morgan t...
2,The Hitman is just 5 sixes away from breaking ...,the hitman is just 5 sixes away from breaking ...
3,"It is not America, Morgan, Phillips or even Ka...",it is not america morgan phillips or even kapi...
4,Wales captain Jac Morgan to miss Scotland Six ...,wales captain jac morgan to miss scotland six ...


In [33]:
eabl_brands.columns

Index(['text', 'date', 'stats', 'sentiment', 'cleaned_text'], dtype='object')

In [34]:
import ast
import pandas as pd

# Assuming you've already imported the necessary libraries and read the DataFrame

def safe_eval(x):
    try:
        return ast.literal_eval(x)
    except (SyntaxError, ValueError):
        return {}

# Apply the safe_eval function to handle potential syntax errors
eabl_brands['stats_dict'] = eabl_brands['stats'].apply(lambda x: safe_eval(x) if pd.notnull(x) else {})

# Extract 'comments', 'retweets', 'quotes', and 'likes' directly from the dictionary
eabl_brands['comments'] = eabl_brands['stats_dict'].apply(lambda x: x.get('comments', 0))
eabl_brands['retweets'] = eabl_brands['stats_dict'].apply(lambda x: x.get('retweets', 0))
eabl_brands['quotes'] = eabl_brands['stats_dict'].apply(lambda x: x.get('quotes', 0))
eabl_brands['likes'] = eabl_brands['stats_dict'].apply(lambda x: x.get('likes', 0))

# Drop the intermediate 'stats_dict' column
eabl_brands = eabl_brands.drop('stats_dict', axis=1)

# Display the DataFrame with the new columns
eabl_brands.head(5)

Unnamed: 0,text,date,stats,sentiment,cleaned_text,comments,retweets,quotes,likes
0,こりゃうめぇ🤤 #ラム酒 #Captain Morgan #こりゃうめぇ,"Jan 11, 2024 · 10:10 AM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",neutral,captain morgan,0,0,0,11
1,Check out Captain Morgan Hat + Captain Morgan...,"Dec 21, 2023 · 11:33 PM UTC","{'comments': 0, 'retweets': 7, 'quotes': 0, 'l...",neutral,check out captain morgan hat captain morgan t...,0,7,0,0
2,The Hitman is just 5 sixes away from breaking ...,"Jan 8, 2024 · 5:15 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",neutral,the hitman is just 5 sixes away from breaking ...,0,0,0,0
3,"It is not America, Morgan, Phillips or even Ka...","Dec 28, 2023 · 5:35 AM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",neutral,it is not america morgan phillips or even kapi...,0,0,0,0
4,Wales captain Jac Morgan to miss Scotland Six ...,"Dec 27, 2023 · 10:04 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",negative,wales captain jac morgan to miss scotland six ...,0,0,0,0


In [35]:
eabl_brands.columns

Index(['text', 'date', 'stats', 'sentiment', 'cleaned_text', 'comments',
       'retweets', 'quotes', 'likes'],
      dtype='object')

In [36]:
eabl_brands = eabl_brands.drop(['text', 'stats'], axis=1)

# Corrected line for renaming columns
eabl_brands.rename(columns={'date': 'stats'}, inplace=True)

columns_order = ['cleaned_text', 'sentiment', 'stats', 'comments', 'retweets', 'quotes', 'likes']
eabl_brands = eabl_brands[columns_order]

eabl_brands.head()


Unnamed: 0,cleaned_text,sentiment,stats,comments,retweets,quotes,likes
0,captain morgan,neutral,"Jan 11, 2024 · 10:10 AM UTC",0,0,0,11
1,check out captain morgan hat captain morgan t...,neutral,"Dec 21, 2023 · 11:33 PM UTC",0,7,0,0
2,the hitman is just 5 sixes away from breaking ...,neutral,"Jan 8, 2024 · 5:15 PM UTC",0,0,0,0
3,it is not america morgan phillips or even kapi...,neutral,"Dec 28, 2023 · 5:35 AM UTC",0,0,0,0
4,wales captain jac morgan to miss scotland six ...,negative,"Dec 27, 2023 · 10:04 PM UTC",0,0,0,0


In [38]:
eabl_brands.to_csv("eablProducts.csv")

##### EABL Chrome Gin

In [40]:
ChromeBrand = pd.read_csv("ChromGinData.csv")
ChromeBrand.head()

Unnamed: 0.1,Unnamed: 0,link,tweet,date,texts,sentiment
0,0,https://twitter.com/magniawoof/status/17469436...,'cause I can't make you love me if you don't y...,"Jan 15, 2024 · 5:13 PM UTC","{'comments': 0, 'retweets': 4, 'quotes': 0, 'l...",Negative
1,1,https://twitter.com/dmchnmeowkizzen/status/174...,"та ні-ні, можете робити (⁠◕⁠ᴗ⁠◕⁠✿⁠)","Jan 15, 2024 · 5:20 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",Neutral
2,2,https://twitter.com/dmchnmeowkizzen/status/174...,"розумієте, я іноді малюю маленькі незначні ске...","Jan 15, 2024 · 3:34 PM UTC","{'comments': 1, 'retweets': 0, 'quotes': 0, 'l...",Neutral
3,3,https://twitter.com/dmchnmeowkizzen/status/174...,у мене є ак для малювання @dmchn_art ііі я там...,"Jan 15, 2024 · 2:57 PM UTC","{'comments': 2, 'retweets': 0, 'quotes': 0, 'l...",Neutral
4,4,https://twitter.com/dmchnmeowkizzen/status/174...,oh no my spotify proposed me to listen to lana...,"Jan 15, 2024 · 2:55 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",Negative


In [41]:
ChromeBrand = ChromeBrand.drop(['Unnamed: 0','link'], axis=1)
ChromeBrand.head(3)

Unnamed: 0,tweet,date,texts,sentiment
0,'cause I can't make you love me if you don't y...,"Jan 15, 2024 · 5:13 PM UTC","{'comments': 0, 'retweets': 4, 'quotes': 0, 'l...",Negative
1,"та ні-ні, можете робити (⁠◕⁠ᴗ⁠◕⁠✿⁠)","Jan 15, 2024 · 5:20 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",Neutral
2,"розумієте, я іноді малюю маленькі незначні ске...","Jan 15, 2024 · 3:34 PM UTC","{'comments': 1, 'retweets': 0, 'quotes': 0, 'l...",Neutral


In [43]:
# Apply the clean_text function to the 'text' column
ChromeBrand['cleaned_text'] = ChromeBrand['tweet'].apply(clean_text)

# Display the DataFrame with the cleaned_text column
ChromeBrand[['tweet', 'cleaned_text']].head()

Unnamed: 0,tweet,cleaned_text
0,'cause I can't make you love me if you don't y...,cause i cant make you love me if you dont you ...
1,"та ні-ні, можете робити (⁠◕⁠ᴗ⁠◕⁠✿⁠)",
2,"розумієте, я іноді малюю маленькі незначні ске...",
3,у мене є ак для малювання @dmchn_art ііі я там...,
4,oh no my spotify proposed me to listen to lana...,oh no my spotify proposed me to listen to lana...


In [48]:
def safe_eval(x):
    try:
        return ast.literal_eval(x)
    except (SyntaxError, ValueError):
        return {}

# Apply the safe_eval function to handle potential syntax errors
ChromeBrand['stats_dict'] = ChromeBrand['texts'].apply(lambda x: safe_eval(x) if pd.notnull(x) else {})

# Extract 'comments', 'retweets', 'quotes', and 'likes' directly from the dictionary
ChromeBrand['comments'] = ChromeBrand['stats_dict'].apply(lambda x: x.get('comments', 0))
ChromeBrand['retweets'] = ChromeBrand['stats_dict'].apply(lambda x: x.get('retweets', 0))
ChromeBrand['quotes'] = ChromeBrand['stats_dict'].apply(lambda x: x.get('quotes', 0))
ChromeBrand['likes'] = ChromeBrand['stats_dict'].apply(lambda x: x.get('likes', 0))

# Drop the intermediate 'stats_dict' column
ChromeBrand = ChromeBrand.drop('stats_dict', axis=1)

# Display the DataFrame with the new columns
ChromeBrand.head(5)

Unnamed: 0,tweet,date,texts,sentiment,cleaned_text,comments,retweets,quotes,likes
0,'cause I can't make you love me if you don't y...,"Jan 15, 2024 · 5:13 PM UTC","{'comments': 0, 'retweets': 4, 'quotes': 0, 'l...",Negative,cause i cant make you love me if you dont you ...,0,4,0,21
1,"та ні-ні, можете робити (⁠◕⁠ᴗ⁠◕⁠✿⁠)","Jan 15, 2024 · 5:20 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",Neutral,,0,0,0,1
2,"розумієте, я іноді малюю маленькі незначні ске...","Jan 15, 2024 · 3:34 PM UTC","{'comments': 1, 'retweets': 0, 'quotes': 0, 'l...",Neutral,,1,0,0,1
3,у мене є ак для малювання @dmchn_art ііі я там...,"Jan 15, 2024 · 2:57 PM UTC","{'comments': 2, 'retweets': 0, 'quotes': 0, 'l...",Neutral,,2,0,0,1
4,oh no my spotify proposed me to listen to lana...,"Jan 15, 2024 · 2:55 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",Negative,oh no my spotify proposed me to listen to lana...,0,0,0,0


In [50]:
ChromeBrand.shape

(3302, 9)

In [51]:
ChromeBrand = ChromeBrand.drop(['tweet', 'texts'], axis=1)

# Corrected line for renaming columns
ChromeBrand.rename(columns={'date': 'stats'}, inplace=True)

columns_order = ['cleaned_text', 'sentiment', 'stats', 'comments', 'retweets', 'quotes', 'likes']
ChromeBrand = eabl_brands[columns_order]

ChromeBrand.head()

Unnamed: 0,cleaned_text,sentiment,stats,comments,retweets,quotes,likes
0,captain morgan,neutral,"Jan 11, 2024 · 10:10 AM UTC",0,0,0,11
1,check out captain morgan hat captain morgan t...,neutral,"Dec 21, 2023 · 11:33 PM UTC",0,7,0,0
2,the hitman is just 5 sixes away from breaking ...,neutral,"Jan 8, 2024 · 5:15 PM UTC",0,0,0,0
3,it is not america morgan phillips or even kapi...,neutral,"Dec 28, 2023 · 5:35 AM UTC",0,0,0,0
4,wales captain jac morgan to miss scotland six ...,negative,"Dec 27, 2023 · 10:04 PM UTC",0,0,0,0


In [52]:
eabl_brands.to_csv("ChromeGin.CSV")

##### EABL Sponsored Events

In [55]:
Events = pd.read_excel("organized_data.xlsx")
Events.head()

Unnamed: 0,Data,Text,Stats,Links,Sentiment
0,https://twitter.com/kibor_kifra/status/1700925...,"This just in, the month-long celebration of Ke...","Sep 10, 2023 · 5:33 PM UTC","{'comments': 0, 'retweets': 11, 'quotes': 0, '...",Positive
1,https://twitter.com/booma_creatives/status/173...,"Come and experience South Sudanese Culture, Fo...","Dec 15, 2023 · 9:45 AM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",Neutral
2,https://twitter.com/MelV_DjMelcj/status/172503...,"My fave TBT jam is ""Why Lie"". s/o to the homi...","Nov 16, 2023 · 6:00 AM UTC","{'comments': 0, 'retweets': 1, 'quotes': 0, 'l...",Positive
3,https://twitter.com/GuerMomo/status/1723819405...,Share the excitement for #TuskerOktobafest! Wh...,"Nov 12, 2023 · 9:45 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",Positive
4,https://twitter.com/GuerMomo/status/1723818981...,🎉 Tell us about your festival fashion at #Tusk...,"Nov 12, 2023 · 9:44 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",Positive


In [56]:
Events.shape

(10000, 5)

In [57]:
Events = Events.drop(['Data'], axis=1)
Events.head(3)

Unnamed: 0,Text,Stats,Links,Sentiment
0,"This just in, the month-long celebration of Ke...","Sep 10, 2023 · 5:33 PM UTC","{'comments': 0, 'retweets': 11, 'quotes': 0, '...",Positive
1,"Come and experience South Sudanese Culture, Fo...","Dec 15, 2023 · 9:45 AM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",Neutral
2,"My fave TBT jam is ""Why Lie"". s/o to the homi...","Nov 16, 2023 · 6:00 AM UTC","{'comments': 0, 'retweets': 1, 'quotes': 0, 'l...",Positive


In [58]:
# Apply the clean_text function to the 'text' column
Events['cleaned_text'] = Events['Text'].apply(clean_text)

# Display the DataFrame with the cleaned_text column
Events[['Text', 'cleaned_text']].head()

Unnamed: 0,Text,cleaned_text
0,"This just in, the month-long celebration of Ke...",this just in the monthlong celebration of keny...
1,"Come and experience South Sudanese Culture, Fo...",come and experience south sudanese culture foo...
2,"My fave TBT jam is ""Why Lie"". s/o to the homi...",my fave tbt jam is why lie so to the homie fo...
3,Share the excitement for #TuskerOktobafest! Wh...,share the excitement for tuskeroktobafest what...
4,🎉 Tell us about your festival fashion at #Tusk...,tell us about your festival fashion at tusker...


In [61]:
def safe_eval(x):
    try:
        return ast.literal_eval(x)
    except (SyntaxError, ValueError):
        return {}

# Apply the safe_eval function to handle potential syntax errors
Events['stats_dict'] = Events['Text'].apply(lambda x: safe_eval(x) if pd.notnull(x) else {})

# Extract 'comments', 'retweets', 'quotes', and 'likes' directly from the dictionary
Events['comments'] = Events['stats_dict'].apply(lambda x: x.get('comments', 0) if isinstance(x, dict) else 0)
Events['retweets'] = Events['stats_dict'].apply(lambda x: x.get('retweets', 0) if isinstance(x, dict) else 0)
Events['quotes'] = Events['stats_dict'].apply(lambda x: x.get('quotes', 0) if isinstance(x, dict) else 0)
Events['likes'] = Events['stats_dict'].apply(lambda x: x.get('likes', 0) if isinstance(x, dict) else 0)

# Drop the intermediate 'stats_dict' column
Events = Events.drop('stats_dict', axis=1)

# Display the DataFrame with the new columns
Events.head(5)

Unnamed: 0,Text,Stats,Links,Sentiment,cleaned_text,comments,retweets,quotes,likes
0,"This just in, the month-long celebration of Ke...","Sep 10, 2023 · 5:33 PM UTC","{'comments': 0, 'retweets': 11, 'quotes': 0, '...",Positive,this just in the monthlong celebration of keny...,0,0,0,0
1,"Come and experience South Sudanese Culture, Fo...","Dec 15, 2023 · 9:45 AM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",Neutral,come and experience south sudanese culture foo...,0,0,0,0
2,"My fave TBT jam is ""Why Lie"". s/o to the homi...","Nov 16, 2023 · 6:00 AM UTC","{'comments': 0, 'retweets': 1, 'quotes': 0, 'l...",Positive,my fave tbt jam is why lie so to the homie fo...,0,0,0,0
3,Share the excitement for #TuskerOktobafest! Wh...,"Nov 12, 2023 · 9:45 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",Positive,share the excitement for tuskeroktobafest what...,0,0,0,0
4,🎉 Tell us about your festival fashion at #Tusk...,"Nov 12, 2023 · 9:44 PM UTC","{'comments': 0, 'retweets': 0, 'quotes': 0, 'l...",Positive,tell us about your festival fashion at tusker...,0,0,0,0


In [65]:
# Events = Events.drop(['Text', 'Links'], axis=1)

# Corrected line for renaming columns
# ChromeBrand.rename(columns={'date': 'stats'}, inplace=True)

columns_order = ['cleaned_text', 'Sentiment', 'Stats', 'comments', 'retweets', 'quotes', 'likes']
Events = Events[columns_order]

Events.head()

Unnamed: 0,cleaned_text,Sentiment,Stats,comments,retweets,quotes,likes
0,this just in the monthlong celebration of keny...,Positive,"Sep 10, 2023 · 5:33 PM UTC",0,0,0,0
1,come and experience south sudanese culture foo...,Neutral,"Dec 15, 2023 · 9:45 AM UTC",0,0,0,0
2,my fave tbt jam is why lie so to the homie fo...,Positive,"Nov 16, 2023 · 6:00 AM UTC",0,0,0,0
3,share the excitement for tuskeroktobafest what...,Positive,"Nov 12, 2023 · 9:45 PM UTC",0,0,0,0
4,tell us about your festival fashion at tusker...,Positive,"Nov 12, 2023 · 9:44 PM UTC",0,0,0,0


In [66]:
Events.to_csv("Events.CSV")