### Data Preprocessing

In [11]:
import pandas as pd

adele_tweets = pd.read_csv('twts_tweets.csv', encoding='ISO-8859-1')

adele_tweets.columns

Index(['user_id', 'status_id', 'created_at', 'screen_name', 'text', 'source',
       'display_text_width', 'favorite_count', 'retweet_count', 'hashtags',
       'status_url'],
      dtype='object')

In [12]:
adele_tweets = adele_tweets.loc[:,['user_id', 'created_at', 'text', 'display_text_width', 'favorite_count', 'retweet_count']]
adele_tweets.head()

Unnamed: 0,user_id,created_at,text,display_text_width,favorite_count,retweet_count
0,x3243242489,2021-10-15 10:57:25,#Adele doing it again with #EasyOnMe \r\nThe e...,79,0,0
1,x1433677915984592897,2021-10-15 10:57:18,I have not listened to @Adeleâs new song 100...,62,0,0
2,x451764204,2021-10-15 10:57:11,Played Adeleâs song all night. Woke up to a ...,139,0,0
3,x1415418178377699332,2021-10-15 10:57:01,Just an attempt &gt;//&lt;\r\n\r\n#EasyOnMe ht...,37,0,0
4,x1442179906834558978,2021-10-15 10:56:58,I changed who i was to put you both first but ...,77,0,0


In [13]:
import pandas as pd

adele_users = pd.read_csv('twts_users.csv', encoding='ISO-8859-1')

adele_users.columns

Index(['user_id', 'screen_name', 'followers_count', 'friends_count',
       'listed_count', 'statuses_count', 'favourites_count',
       'account_created_at', 'verified'],
      dtype='object')

In [14]:
user_list = ['followers_count', 'friends_count']
for user in user_list:
    adele_tweets[user] = adele_users[user]

adele_tweets.head()

Unnamed: 0,user_id,created_at,text,display_text_width,favorite_count,retweet_count,followers_count,friends_count
0,x3243242489,2021-10-15 10:57:25,#Adele doing it again with #EasyOnMe \r\nThe e...,79,0,0,119.0,223.0
1,x1433677915984592897,2021-10-15 10:57:18,I have not listened to @Adeleâs new song 100...,62,0,0,100.0,260.0
2,x451764204,2021-10-15 10:57:11,Played Adeleâs song all night. Woke up to a ...,139,0,0,104.0,180.0
3,x1415418178377699332,2021-10-15 10:57:01,Just an attempt &gt;//&lt;\r\n\r\n#EasyOnMe ht...,37,0,0,93.0,98.0
4,x1442179906834558978,2021-10-15 10:56:58,I changed who i was to put you both first but ...,77,0,0,37.0,72.0


### Sentimental Analysis

In [35]:
from textblob import TextBlob
import nltk
import re
nltk.download('punkt')

text = adele_tweets['text'][1]
blob = TextBlob(text)
blob.sentences[0].sentiment
for sent in blob.sentences:
    print(sent)

I have not listened to @Adeleâs new song 1000 times.
#EasyOnMe


[nltk_data] Downloading package punkt to /Users/chanson/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [62]:
# sentimental analysis
text = adele_tweets['text'][26]
blob = TextBlob(text)
blob.sentences[0].sentiment
for sent in blob.sentences:
    print(sent, sent.sentiment)

Here are some of the products that are still IN STOCK on Adeleâs online store! Sentiment(polarity=0.0, subjectivity=0.0)
If youâre in the US buy the Single on Friday so that itâll count towards the hot 100! Sentiment(polarity=0.12053571428571429, subjectivity=0.5321428571428571)
Hereâs the link to the store: https://t.co/mrINBogBYA

#Adele #Adele30 #EasyOnMe https://t.co/7hbYJDnFhy Sentiment(polarity=0.0, subjectivity=0.0)


In [65]:
pol = []
subj = []
for index, row in adele_tweets.iterrows():
    blob = TextBlob(row['text'])
    sum_pol, sum_subj = 0, 0
    for sent in blob.sentences:
        sum_pol += sent.sentiment[0]
        sum_subj += sent.sentiment[1]
    pol.append(sum_pol/len(blob.sentences))
    subj.append(sum_subj/len(blob.sentences))

adele_tweets['polarity'] = pol
adele_tweets['subjectivity'] = subj

In [71]:
from datetime import datetime, timedelta

t_init = datetime(2021,10,14,23,0,0,0)
t_end = datetime(2021,10,15,11,0,0,0)
delta = t_end - t_init
delta.seconds

43200

In [79]:
delta_time = []
for index, row in adele_tweets.iterrows():
    t_list = re.split('-| |:', row['created_at'])
    t_list = [int(t) for t in t_list]
    t_create = datetime(t_list[0], t_list[1], t_list[2], t_list[3], t_list[4], t_list[5], 0)
    delta_time.append((t_create - t_init).seconds)
adele_tweets['time'] = delta_time

adele_tweets.head()

Unnamed: 0,user_id,created_at,text,display_text_width,favorite_count,retweet_count,followers_count,friends_count,polarity,subjectivity,time
0,x3243242489,2021-10-15 10:57:25,#Adele doing it again with #EasyOnMe \r\nThe e...,79,0,0,119.0,223.0,0.0,0.0,43045
1,x1433677915984592897,2021-10-15 10:57:18,I have not listened to @Adeleâs new song 100...,62,0,0,100.0,260.0,0.068182,0.227273,43038
2,x451764204,2021-10-15 10:57:11,Played Adeleâs song all night. Woke up to a ...,139,0,0,104.0,180.0,0.075,0.0,43031
3,x1415418178377699332,2021-10-15 10:57:01,Just an attempt &gt;//&lt;\r\n\r\n#EasyOnMe ht...,37,0,0,93.0,98.0,0.0,0.0,43021
4,x1442179906834558978,2021-10-15 10:56:58,I changed who i was to put you both first but ...,77,0,0,37.0,72.0,0.25,0.333333,43018


In [83]:
adele_tweets.dropna(axis=0, how='any', inplace=True)
adele_tweets.to_csv('adele.csv')

In [2]:
import pandas as pd
adele_tweets = pd.read_csv('adele.csv', encoding='ISO-8859-1')
adele_tweets.head()

Unnamed: 0.1,Unnamed: 0,user_id,created_at,text,display_text_width,favorite_count,retweet_count,followers_count,friends_count,polarity,subjectivity,time
0,0,x3243242489,2021-10-15 10:57:25,#Adele doing it again with #EasyOnMe \r\nThe e...,79,0,0,119.0,223.0,0.0,0.0,43045
1,1,x1433677915984592897,2021-10-15 10:57:18,I have not listened to @AdeleÃ¢ÂÂs new song ...,62,0,0,100.0,260.0,0.068182,0.227273,43038
2,2,x451764204,2021-10-15 10:57:11,Played AdeleÃ¢ÂÂs song all night. Woke up to...,139,0,0,104.0,180.0,0.075,0.0,43031
3,3,x1415418178377699332,2021-10-15 10:57:01,Just an attempt &gt;//&lt;\r\n\r\n#EasyOnMe ht...,37,0,0,93.0,98.0,0.0,0.0,43021
4,4,x1442179906834558978,2021-10-15 10:56:58,I changed who i was to put you both first but ...,77,0,0,37.0,72.0,0.25,0.333333,43018


In [2]:
import pandas as pd
adele_tweets = pd.read_csv('adele.csv', encoding='ISO-8859-1')
adele_tweets = adele_tweets.loc[:, 'display_text_width':]
adele_tweets.head()


Unnamed: 0,display_text_width,favorite_count,retweet_count,followers_count,friends_count,polarity,subjectivity,time
0,79,0,0,119.0,223.0,0.0,0.0,43045
1,62,0,0,100.0,260.0,0.068182,0.227273,43038
2,139,0,0,104.0,180.0,0.075,0.0,43031
3,37,0,0,93.0,98.0,0.0,0.0,43021
4,77,0,0,37.0,72.0,0.25,0.333333,43018


In [3]:
adele_tweets.to_csv('adele.csv')