In [3]:
import re
import csv
from getpass import getpass
from time import sleep
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from msedge.selenium_tools import Edge, EdgeOptions

def get_tweet_data(card):
    """Extract data from tweet card"""
    username = card.find_element("xpath",'.//span').text
    try:
        handle = card.find_element("xpath",'.//span[contains(text(), "@")]').text
    except NoSuchElementException:
        return
    
    try:
        postdate = card.find_element("xpath",'.//time').get_attribute('datetime')
    except NoSuchElementException:
        return
    
    comment = card.find_element("xpath",'.//div[2]/div[2]/div[1]').text
    responding = card.find_element("xpath",'.//div[2]/div[2]/div[2]').text
    text = comment + responding
    reply_cnt = card.find_element("xpath",'.//div[@data-testid="reply"]').text
    retweet_cnt = card.find_element("xpath",'.//div[@data-testid="retweet"]').text
    like_cnt = card.find_element("xpath",'.//div[@data-testid="like"]').text
    
    # get a string of all emojis contained in the tweet
    """Emojis are stored as images... so I convert the filename, which is stored as unicode, into 
    the emoji character."""
    emoji_tags = card.find_elements("xpath",'.//img[contains(@src, "emoji")]')
    emoji_list = []
    for tag in emoji_tags:
        filename = tag.get_attribute('src')
        try:
            emoji = chr(int(re.search(r'svg\/([a-z0-9]+)\.svg', filename).group(1), base=16))
        except AttributeError:
            continue
        if emoji:
            emoji_list.append(emoji)
    emojis = ' '.join(emoji_list)
    
    tweet = (username, handle, postdate, text, emojis, reply_cnt, retweet_cnt, like_cnt)
    return tweet    

In [5]:
# application variables
user = input('username: ')
my_password = getpass('Password: ')
search_term = input('search term: ')

# create instance of web driver
options = EdgeOptions()
options.use_chromium = True
driver = Edge(options=options)

# navigate to login screen
driver.get('https://www.twitter.com/login')
driver.maximize_window()
sleep(5)
username = driver.find_element("xpath",'//input[@name="text"]')
username.send_keys(user)
username.send_keys(Keys.RETURN)
sleep(3)

password = driver.find_element('xpath','//input[@name="password"]')
password.send_keys(my_password)
password.send_keys(Keys.RETURN)
sleep(3)

# find search input and search for term
search_input = driver.find_element('xpath','//input[@aria-label="Search query"]')
search_input.send_keys(search_term)
search_input.send_keys(Keys.RETURN)
sleep(1)

# navigate to historical 'latest' tab
driver.find_element_by_link_text('Latest').click()

username: Dathu55483800
Password: ········
search term: #VMwareExplore


  driver = Edge(options=options)


AttributeError: 'WebDriver' object has no attribute 'find_element_by_link_text'

In [6]:
# get all tweets on the page
data = []
twitter_tweet_ = []
tweet_ids = set()
last_position = driver.execute_script("return window.pageYOffset;")
scrolling = True

while scrolling:
    page_cards = driver.find_elements('xpath', '//article[@data-testid="tweet"]')
    for card in page_cards[-15:]:
        tweet = get_tweet_data(card)
        if tweet:
            tweet_id = ''.join(tweet)
            if tweet_id not in tweet_ids:
                tweet_ids.add(tweet_id)
                data.append(tweet)
                twitter_tweet_.append(tweet[3])
            
    scroll_attempt = 0
    while True:
        # check scroll position
        driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
        sleep(2)
        curr_position = driver.execute_script("return window.pageYOffset;")
        if last_position == curr_position:
            scroll_attempt += 1
            
            # end of scroll region
            if scroll_attempt >= 3:
                scrolling = False
                break
            else:
                sleep(2) # attempt another scroll
        else:
            last_position = curr_position
            break

# close the web driver
driver.close()

In [7]:
len(data)

44

Saving the twitter data

In [8]:
with open('demo.csv', 'w', newline='', encoding='utf-8') as f:
    header = ['UserName', 'Handle', 'Timestamp', 'Text', 'Emojis', 'Comments', 'Likes', 'Retweets']
    writer = csv.writer(f)
    writer.writerow(header)
    writer.writerows(data)

In [9]:
import pandas as pd
header = ['UserName', 'Handle', 'Timestamp', 'Text', 'Emojis', 'Comments', 'Likes', 'Retweets']
df = pd.DataFrame(data,columns= header)
df.head()

Unnamed: 0,UserName,Handle,Timestamp,Text,Emojis,Comments,Likes,Retweets
0,Ashutosh mourya,@Anurag_091,2023-04-11T06:20:19.000Z,Ashutosh mourya\n@Anurag_091\n·\n2mThat's some...,,,,
1,m.,@imshivans7,2023-04-11T06:20:13.000Z,m.\n@imshivans7\n·\n2mஇது ஒரு சிறப்பு மற்றும் ...,,,,
2,Shivam Singh,@mrshivam_9,2023-04-11T06:20:02.000Z,Shivam Singh\n@mrshivam_9\n·\n2mThat's somethi...,,,,
3,Kapil mishra,@imkapil_mishra,2023-04-11T06:19:55.000Z,Kapil mishra\n@imkapil_mishra\n·\n2mThe one wh...,,,1.0,
4,vijay,@vijay__raj6,2023-04-11T06:19:55.000Z,vijay\n@vijay__raj6\n·\n2mતે આ માટે કંઈક ખાસ અ...,,,,


In [10]:
twitter_tweet = pd.array(twitter_tweet_)
df['Text'] = twitter_tweet

In [11]:
df.head() # gives the updated dataset

Unnamed: 0,UserName,Handle,Timestamp,Text,Emojis,Comments,Likes,Retweets
0,Ashutosh mourya,@Anurag_091,2023-04-11T06:20:19.000Z,Ashutosh mourya @Anurag_091 · 2mThat's somethi...,,,,
1,m.,@imshivans7,2023-04-11T06:20:13.000Z,m. @imshivans7 · 2mஇது ஒரு சிறப்பு மற்றும் மகி...,,,,
2,Shivam Singh,@mrshivam_9,2023-04-11T06:20:02.000Z,Shivam Singh @mrshivam_9 · 2mThat's something ...,,,,
3,Kapil mishra,@imkapil_mishra,2023-04-11T06:19:55.000Z,Kapil mishra @imkapil_mishra · 2mThe one who h...,,,1.0,
4,vijay,@vijay__raj6,2023-04-11T06:19:55.000Z,vijay @vijay__raj6 · 2mતે આ માટે કંઈક ખાસ અને ...,,,,


In [12]:
df.tail()

Unnamed: 0,UserName,Handle,Timestamp,Text,Emojis,Comments,Likes,Retweets
39,Shree Radhe,@imradhe95,2023-04-11T06:06:17.000Z,Shree Radhe @imradhe95 · 16mಇದು ಮುಂಬೈನಲ್ಲಿ ನಡೆ...,,,,
40,Parul Patel,@Parul2fb,2023-04-11T06:06:16.000Z,Parul Patel @Parul2fb · 16mIf you want to see ...,,,,
41,Jyoti Singh,@Jyoti_S12,2023-04-11T06:06:12.000Z,Jyoti Singh @Jyoti_S12 · 16mYah interesting ev...,,,,
42,punam,@Punampanday87,2023-04-11T06:06:05.000Z,punam @Punampanday87 · 16myou won't want to mi...,,,,
43,sweety sharma,@sweetysharma108,2023-04-11T06:06:03.000Z,sweety sharma @sweetysharma108 · 16mAt #VMware...,,,,


In [13]:
df.to_csv('mydata.csv', index=False)

In [14]:
data

[('Ashutosh mourya',
  '@Anurag_091',
  '2023-04-11T06:20:19.000Z',
  "Ashutosh mourya\n@Anurag_091\n·\n2mThat's something special and happy for this one #VMwareExplore",
  '',
  '',
  '',
  ''),
 ('m.',
  '@imshivans7',
  '2023-04-11T06:20:13.000Z',
  'm.\n@imshivans7\n·\n2mஇது ஒரு சிறப்பு மற்றும் மகிழ்ச்சியான விஷயம் #VMwareExplore',
  '',
  '',
  '',
  ''),
 ('Shivam Singh',
  '@mrshivam_9',
  '2023-04-11T06:20:02.000Z',
  "Shivam Singh\n@mrshivam_9\n·\n2mThat's something special and happy for this one #VMwareExplore",
  '',
  '',
  '',
  ''),
 ('Kapil mishra',
  '@imkapil_mishra',
  '2023-04-11T06:19:55.000Z',
  'Kapil mishra\n@imkapil_mishra\n·\n2mThe one who has medicine for every pain.... That unique relationship is parents...\n#भ्रष्टाचार_खिलाफ_पायलट\n#VMwareExplore\n#ClimateSmartUP\n#VMwareIndia #TejRan',
  '',
  '',
  '1',
  ''),
 ('vijay',
  '@vijay__raj6',
  '2023-04-11T06:19:55.000Z',
  'vijay\n@vijay__raj6\n·\n2mતે આ માટે કંઈક ખાસ અને ખુશ છે #VMwareExplore',
  '',
  '',
  

In [15]:
tweet


('sweety sharma',
 '@sweetysharma108',
 '2023-04-11T06:06:03.000Z',
 "sweety sharma\n@sweetysharma108\n·\n16mAt #VMwareExplore, we're seeing firsthand how multi-cloud innovation is altering the business. It's astounding to witness such countless imaginative thoughts and arrangements in a single spot!",
 '',
 '',
 '',
 '')

In [16]:
tweet[3]

"sweety sharma\n@sweetysharma108\n·\n16mAt #VMwareExplore, we're seeing firsthand how multi-cloud innovation is altering the business. It's astounding to witness such countless imaginative thoughts and arrangements in a single spot!"

In [17]:
df["Text"]

0     Ashutosh mourya
@Anurag_091
·
2mThat's somethi...
1     m.
@imshivans7
·
2mஇது ஒரு சிறப்பு மற்றும் மகி...
2     Shivam Singh
@mrshivam_9
·
2mThat's something ...
3     Kapil mishra
@imkapil_mishra
·
2mThe one who h...
4     vijay
@vijay__raj6
·
2mતે આ માટે કંઈક ખાસ અને ...
5     Raja
@iam_raja100
·
2mಇದು ವಿಶೇಷ ಮತ್ತು ಸಂತೋಷದ ಸ...
6     Arun tiwari
@Aruntiwari509
·
3mThat's somethin...
7     Pavan thakur
@Pavan_thakur0
·
3mਇਹ ਇਸ ਲਈ ਕੁਝ ਖ...
8     kanhaiya
@Rishi_xtp
·
3mइसके लिए यह कुछ खास और...
9     Anubhav
@Anubhav1t
·
3mThat's something specia...
10    imshivaaye
@anzalisingh7
·
4mThat's something ...
11    Shree Radhe
@imradhe95
·
4mஏய் ட்வீப்ஸ் இந்த ஆ...
12    Isshu
@iamishu09
·
5mહે ટ્વીપ્સ આ વર્ષની સૌથી ...
13    Raghav
@imraghaav09
·
5mਹੇ ਟਵੀਪਸ ਸਾਲ ਦੇ ਇਸ ਸਭ ...
14    Ashutosh mourya
@Anurag_091
·
5mਹੇ ਟਵੀਪਸ ਸਾਲ ਦ...
15    mr_shivay
@iam_shivay7
·
5mਹੇ ਟਵੀਪਸ ਸਾਲ ਦੇ ਇਸ ...
16    Mr Rangbaz
@mr_rangbaz18
·
6mनमस्ते ट्वीट्स वर...
17    𝐌𝐚𝐧_𝐄𝐞𝐬𝐡™
@Mr_Python17
·
6mThe biggest mul

In [18]:
twitter_tweet

<StringArray>
[                                                                                                                                                      "Ashutosh mourya\n@Anurag_091\n·\n2mThat's something special and happy for this one #VMwareExplore",
                                                                                                                                                                        'm.\n@imshivans7\n·\n2mஇது ஒரு சிறப்பு மற்றும் மகிழ்ச்சியான விஷயம் #VMwareExplore',
                                                                                                                                                          "Shivam Singh\n@mrshivam_9\n·\n2mThat's something special and happy for this one #VMwareExplore",
                                                  'Kapil mishra\n@imkapil_mishra\n·\n2mThe one who has medicine for every pain.... That unique relationship is parents...\n#भ्रष्टाचार_खिलाफ_पायलट\n#VMwareExplore\n#ClimateSmartUP\n#

In [26]:
import pandas as pd
# Read the CSV file into a DataFrame
MYdata = pd.read_csv('C:/Users/kldat/Downloads/mydata.csv')

# Print the DataFrame
MYdata.head()


Unnamed: 0,UserName,Handle,Timestamp,Text,Emojis,Comments,Likes,Retweets
0,Tirumalaa Coconut Oil,@TirumalaaOil,2023-04-06T04:59:15.000Z,Tirumalaa Coconut Oil\n@TirumalaaOil\n·\n17sMa...,,,,
1,Sonika singh,@Imsonikasingh,2023-04-06T04:59:09.000Z,Sonika singh\n@Imsonikasingh\n·\n23sसाधु संत क...,🚩,,,
2,Mandeep RajBhar,@MandeepRajBhar_,2023-04-06T04:59:09.000Z,Mandeep RajBhar\n@MandeepRajBhar_\n·\n23sRam k...,,,,
3,Bhuvanesh Sharma,@bhuvanhere,2023-04-06T04:58:01.000Z,Bhuvanesh Sharma\n@bhuvanhere\n·\n1mजय बजरंग ब...,🙏,,,
4,Nimish Raval,@RavalNimish,2023-04-06T04:57:57.000Z,Nimish Raval\n@RavalNimish\n·\n1mअंजनी पुत्र श...,🎈 🚩,,,


In [27]:
MYdata['Text'] = twitter_tweet
MYdata.head()

Unnamed: 0,UserName,Handle,Timestamp,Text,Emojis,Comments,Likes,Retweets
0,Tirumalaa Coconut Oil,@TirumalaaOil,2023-04-06T04:59:15.000Z,Tirumalaa Coconut Oil @TirumalaaOil · 17sMay H...,,,,
1,Sonika singh,@Imsonikasingh,2023-04-06T04:59:09.000Z,Sonika singh @Imsonikasingh · 23sसाधु संत के त...,🚩,,,
2,Mandeep RajBhar,@MandeepRajBhar_,2023-04-06T04:59:09.000Z,Mandeep RajBhar @MandeepRajBhar_ · 23sRam ke B...,,,,
3,Bhuvanesh Sharma,@bhuvanhere,2023-04-06T04:58:01.000Z,Bhuvanesh Sharma @bhuvanhere · 1mजय बजरंग बली ...,🙏,,,
4,Nimish Raval,@RavalNimish,2023-04-06T04:57:57.000Z,Nimish Raval @RavalNimish · 1mअंजनी पुत्र श्री...,🎈 🚩,,,


In [29]:
MYdata['Text'][0]

'Tirumalaa Coconut Oil\n@TirumalaaOil\n·\n17sMay Hanuman ji is always there to bless you with happiness, good fortune, and health. Warm wishes on Hanuman Janmotsav. \n.\n.\n#HanumanJanmotsav #HanumanJayanti #हनुमान_जन्मोत्सव #हनुमान_जयंती'

In [31]:
MYdata.to_csv('UpdatedData.csv', index=False,encoding='utf-8')