### Install Selenium

In [3]:
!pip install selenium



### Install WebDriver Manager:

In [4]:
!pip install webdriver-manager



In [1]:
import pandas as pd

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service  # Import the Service class

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import time
import csv
import random

# Set up Selenium WebDriver
from webdriver_manager.chrome import ChromeDriverManager


# Automatically download and install ChromeDriver

service = Service(ChromeDriverManager().install(), port=9515)
driver = webdriver.Chrome(service=service)


def human_like_delay(min=1, max=3):
    """Random delay to mimic human behavior"""
    time.sleep(random.uniform(min, max))


# Open Twitter search page

hashtags = ["naukri", "jobs", "jobseeker", "vacancy"]
query = " OR ".join(f"#{tag}" for tag in hashtags)
url = f"https://x.com/search?q=naukri%20jobs%20jobseeker%20vacancy&src=typed_query"
driver.get(url)
time.sleep(5)  # Wait for the page to load

# Log in to Twitter (replace with your credentials)

username = "YOUR USERNAME"   
password = "YOUR PASSWORD"

def login_twitter(username, password):
    driver.get(url)
    human_like_delay()
    
    # Enter username
    username_field = driver.find_element(By.XPATH, '//input[@autocomplete="username"]')
    human_like_delay(0.5, 1.5)
    username_field.send_keys(username)
    human_like_delay(0.2, 0.5)
    username_field.send_keys(Keys.RETURN)
    human_like_delay()
    
    # Enter password
    password_field = driver.find_element(By.XPATH, '//input[@autocomplete="current-password"]')
    human_like_delay(0.5, 1.5)
    password_field.send_keys(password)
    human_like_delay(0.2, 0.5)
    password_field.send_keys(Keys.RETURN)
    human_like_delay(2, 4)
    

# Replace with your Twitter credentials
login_twitter(username, password)

# Function to extract tweet data
def extract_tweet_data(tweet):
    try:
        # Wait for username element to be present
        username = WebDriverWait(tweet, 10).until(
            EC.presence_of_element_located((By.XPATH, './/span[contains(text(), "@")]'))
        ).text

        # Wait for date-time element to be present
        date_time = WebDriverWait(tweet, 10).until(
            EC.presence_of_element_located((By.XPATH, './/time'))
        ).get_attribute("datetime")

        # Wait for tweet content element to be present
        content = WebDriverWait(tweet, 10).until(
            EC.presence_of_element_located((By.XPATH, './/div[@data-testid="tweetText"]'))
        ).text

        # Extract mentions
        mentions = [mention.text for mention in tweet.find_elements(By.XPATH, './/a[contains(@href, "/")]')]

        # Extract hashtags
        hashtags = [hashtag.text for hashtag in tweet.find_elements(By.XPATH, './/a[contains(@href, "/hashtag/")]')]

        # Extract likes (handle missing element)
        likes = WebDriverWait(tweet, 10).until(
                EC.presence_of_all_elements_located((By.XPATH, './/button[@data-testid="like"]'))) 
        likes = [like.text for like in likes][0]
        
      
        # Extract retweets (handle missing element)
        retweets = WebDriverWait(tweet, 10).until(
                EC.presence_of_all_elements_located((By.XPATH, './/button[@data-testid="retweet"]')))
        retweets = [retweet.text for retweet in retweets][0]

        
        # Extract replies (handle missing element)
        replies = WebDriverWait(tweet, 10).until(
                EC.presence_of_all_elements_located((By.XPATH, './/button[@data-testid="reply"]')))
        replies = [reply.text for reply in replies][0]
    

        # Extract views (handle missing element)
        views = WebDriverWait(tweet, 10).until(
                EC.presence_of_all_elements_located((By.XPATH, './/span[@data-testid="app-text-transition-container"]')))
        views = [view.text for view in views][3]

        return {
            "Username": username,
            "Date and Time": date_time,
            "Likes": likes,
            "Retweets": retweets,
            "Replies": replies,
            "Views": views,
            "Tweet": content,
            "Mentions": mentions,
            "Hashtags": hashtags
        }
    except Exception as e:
        print(f"Error extracting tweet data: {e}")
        return None

# Main scraping function
def scrape_tweets(num_scrolls=20):
    tweets_data = []
    unique_tweets = set()  # To avoid duplicates
    
    for _ in range(num_scrolls):
        # Find all tweets on the page
        tweets = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.XPATH, '//*[@id="react-root"]')))
        
        # Process new tweets
        for tweet in tweets[-20:]:  # Only process most recent to avoid duplicates
            tweet_data = extract_tweet_data(tweet)
            if tweet_data and tweet_data["Tweet"] not in unique_tweets:  # Check for duplicates
                unique_tweets.add(tweet_data["Tweet"])
                tweets_data.append(tweet_data)
        
        # Scroll to load more tweets
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        human_like_delay(3, 5)  # Important - wait for new tweets to load
            
    return tweets_data


# Run the scraper
tweets_data = scrape_tweets(num_scrolls=20)


# Save data to a CSV file
if tweets_data:
    keys = tweets_data[0].keys() 
    with open("tweets.csv", "w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=keys)
        writer.writeheader()
        writer.writerows(tweets_data)
else:
    print("No tweets were scraped")

# Close the WebDriver
driver.quit()


In [3]:
# Print scraped data
for tweet in tweets_data:
    print(tweet)

{'Username': '@SKMCH', 'Date and Time': '2025-03-16T05:00:14.000Z', 'Tweet': 'Career Opportunity at Karachi Diagnostic Centre & Clinic, Karachi.\n\n Assistant Clinical Nurse Manager-KDC&C, Karachi\n\nFor position details and eligibility criteria, visit: http://shaukatkhanum.org.pk/join-us/current-vacancies…\n\n careers@shaukatkhanum.org.pk\n\n#CareersAtSKMCH #SKMCH #JobAlert #JobsInKarachi', 'Mentions': ['', '', '', '', '', '', '', '', '', '', 'Top', 'Latest', 'People', 'Media', 'Lists', '', 'Shaukat Khanum', '@SKMCH', 'Mar 16', 'http://shaukatkhanum.org.pk/join-us/current-vacancies…', '#CareersAtSKMCH', '#SKMCH', '#JobAlert', '#JobsInKarachi', '', '10K', '', 'Shaukat Khanum', '@SKMCH', 'Feb 2', 'https://bit.ly/SKMCHJobs', '#CareersAtSKMCH', '#SKMCH', '#Jobs', '#Career', '#JobsinLahore', '', '2.7K', '', 'BRIC-NCCS', '@DBT_NCCS_Pune', 'Mar 17', '#CellBiology', '#research', 'https://nccs.res.in/uploads/careers/174169894308P02_2025%20English-Walk%20In%20Interviews.pdf…', 'https://nccs.res

In [3]:
# tweets_data

In [4]:
df = pd.read_csv("tweets.csv") 

In [5]:
df

Unnamed: 0,Username,Date and Time,Tweet,Mentions,Hashtags,Likes,Retweets,Replies,Views
0,@TheEconomist,2023-06-16T04:43:13.000Z,Finance Graduates Vacancy at Dunzo \n#Sarkarir...,"['', '', '', '', '2', '', '', '', '', '', 'Top...","['#Sarkariresult', '#jobs', '#jobsearch', '#jo...",1,,,58
1,@Employ_News,2024-08-02T13:08:18.000Z,NABARD is hiring 100 Assistant Managers (Rura...,"['', '', '', '', '2', '', '', '', '', '', 'Top...","['#NABARDJobs', '#RuralDevelopment', '#Banking...",392,66.0,5.0,73K
2,@AmarUjalaNews,2025-03-22T09:09:25.000Z,Weekly Jobs Bulletin: 89500 से अधिक सरकारी नौक...,"['', '', '', '', '2', '', '', '', '', '', 'Top...","['#WeeklyJobsBulletin', '#Jobs', '#JobsBulleti...",6,2.0,,610
3,@avsarhubb,2023-06-04T04:06:40.000Z,Graduates Vacancy at Unilever \n#Sarkariresult...,"['', '', '', '', '2', '', '', '', '', '', 'Top...","['#Sarkariresult', '#jobs', '#jobsearch', '#jo...",,,,45
4,@Employ_News,2024-12-15T16:42:37.000Z,#CSIR - Central Electronics Engineering Resea...,"['', '', '', '', '3', '', '', '', '', '', 'Top...","['#CSIR', '#SarkariNaukri', '#TechJobs', '#Gov...",304,61.0,1.0,28K
5,@UnSubtleDesi,2021-03-09T06:43:05.000Z,http://OpIndia.com is hiring! \n\nLast date fo...,"['', '', '', '', '3', '', '', '', '', '', 'Top...","['#Jobs', '#Vacancies', '#job', '#jobs', '#job...",3K,634.0,125.0,1
6,@PoliticalKida,2023-01-04T09:51:17.000Z,TOI published a report showing 1.4cr less jobs...,"['', '', '', '', '3', '', '', '', '', '', 'Top...","['#Thread', '', '', '', '', '', '', '#Employme...",822,388.0,8.0,71K
7,@KiranKS,2021-02-26T08:18:44.000Z,Dear youngsters.. Go to http://Naukri.com\n\nS...,"['', '', '', '', '3', '', '', '', '', '', 'Top...",[],475,183.0,25.0,14
