In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException
import warnings
import time

warnings.filterwarnings("ignore")

# Part 1: Model Training
file_path = 'C:\Users\rijaa\Downloads\firstpart_after_ui (3)\firstpart_after_ui\firstpart\trainn.csv'
try:
    data = pd.read_csv(file_path, encoding='utf-8')
except UnicodeDecodeError:
    data = pd.read_csv(file_path, encoding='latin1')

data = data.dropna(subset=['Post', 'Lead'])
data['Lead'] = data['Lead'].apply(lambda x: 1 if x == 'Yes' else 0)

vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2), max_features=5000)
X = vectorizer.fit_transform(data['Post'])
y = data['Lead']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

param_grid = {'C': [0.1, 1, 10], 'solver': ['liblinear', 'saga'], 'max_iter': [1000, 2000]}
grid_search = GridSearchCV(LogisticRegression(), param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_

# Part 2: Facebook Post Extraction
driver = webdriver.Chrome()
driver.maximize_window()
wait = WebDriverWait(driver, 20)

# Login to Facebook
url = 'http://www.facebook.com'
driver.get(url)
username = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='email']")))
password = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='pass']")))

username.clear()
username.send_keys("aroobaminhas14@gmail.com")
password.clear()
password.send_keys("beautyinsights")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']"))).click()
time.sleep(5)

# Navigate to the group/page
page_url = 'https://www.facebook.com/groups/2295020414190663'
driver.get(page_url)
time.sleep(5)

max_posts = 10
all_posts = []
scraped_posts = set()
scroll_attempts = 0
max_scroll_attempts = 5
scraped_posts_count = 0
last_height = driver.execute_script("return document.body.scrollHeight")

while scraped_posts_count < max_posts and scroll_attempts < max_scroll_attempts:
    try:
        posts = driver.find_elements(By.XPATH, "//*[contains(@data-ad-comet-preview, 'message') and contains(@data-ad-preview, 'message')]")
        for post in posts:
            try:
                try:
                    see_more_button = post.find_element(By.XPATH, ".//div[contains(text(), 'See more')]")
                    driver.execute_script("arguments[0].click();", see_more_button)
                    time.sleep(1)
                except NoSuchElementException:
                    pass

                post_text_parts = post.find_elements(By.XPATH, ".//div[@style='text-align: start;']")
                post_text = " ".join([part.text for part in post_text_parts if part.text])
                post_id = post.get_attribute("id") or None

                if post_id and post_id not in scraped_posts:
                    scraped_posts.add(post_id)
                    if post_text:
                        all_posts.append({'Post': post_text, 'Post ID': post_id})
                        scraped_posts_count += 1
                    if scraped_posts_count >= max_posts:
                        break
            except StaleElementReferenceException:
                continue

        if scraped_posts_count >= max_posts:
            break

        driver.execute_script("window.scrollBy(0, 500);")
        time.sleep(3)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            scroll_attempts += 1
        else:
            scroll_attempts = 0
        last_height = new_height

    except StaleElementReferenceException:
        continue

df_posts = pd.DataFrame(all_posts)

# Part 3: Predict Lead/Non-Lead and Save Results
df_posts['Lead'] = best_model.predict(vectorizer.transform(df_posts['Post']))
df_posts.to_csv('classified_posts2.csv', index=False)

print(f"Classified posts have been saved to classified_posts2.csv. Total posts scraped: {scraped_posts_count}.")

# Part 4: Comment on Lead Posts
def comment_on_lead_posts(driver, posts_df, comment_text):
    lead_posts = posts_df[posts_df['Lead'] == 1]

    for _, post in lead_posts.iterrows():
        post_id = post['Post ID']

        try:
            # Locate comment button
            comment_button = driver.find_element(By.XPATH, f"//span[@data-ad-rendering-role='comment_button']")
            driver.execute_script("arguments[0].click();", comment_button)
            time.sleep(2)

            # Locate text box
            text_box = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.XPATH, "//div[@aria-label='Write an answer…' and @contenteditable='true']"))
            )
            driver.execute_script("arguments[0].click();", text_box)
            text_box.send_keys(comment_text + Keys.RETURN)
            time.sleep(2)

            print(f"Commented on post ID: {post_id}")

        except NoSuchElementException:
            print(f"Could not find the comment button or box for post ID: {post_id}. Skipping...")
        except Exception as e:
            print(f"An error occurred while commenting on post ID: {post_id}: {e}")

comment_text = "For getting this service, please visit our store."
comment_on_lead_posts(driver, df_posts, comment_text)

driver.quit()


SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: truncated \UXXXXXXXX escape (3435882705.py, line 17)