<a href="https://colab.research.google.com/github/Manjunatha-kv/Software-Development/blob/main/Untitled3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import requests
from bs4 import BeautifulSoup
import csv
import sqlite3
from datetime import datetime, date

class VergeScraper:
    def __init__(self, url):
        self.url = url
        self.articles = []
        self.conn = sqlite3.connect('verge_articles.db')
        self.cursor = self.conn.cursor()
        self.cursor.execute('''
            CREATE TABLE IF NOT EXISTS articles (
                id INTEGER PRIMARY KEY,
                url TEXT,
                headline TEXT,
                author TEXT,
                date TEXT
            )
        ''')
    
    def scrape(self):
        # Send HTTP request to the website
        response = requests.get(self.url)

        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')
        article_elements = soup.find_all('div', class_='c-entry-box--compact__body')

        # Extract information from each article
        for idx, article in enumerate(article_elements):
            headline = article.h2.text.strip()
            url = article.h2.a['href']
            author = article.find('span', class_='c-byline__item').text.strip()
            date = article.find('span', class_='c-byline__item').next_sibling.strip()

            # Convert date string to datetime object
            date_obj = datetime.strptime(date, '%B %d, %Y')

            # Append article to the list
            self.articles.append({
                'id': idx,
                'url': url,
                'headline': headline,
                'author': author,
                'date': date_obj.date()
            })
    
    def save_csv(self):
        # Save articles to CSV file
        filename = datetime.now().strftime('%d%m%Y') + '_verge.csv'
        with open(filename, 'w', newline='') as csvfile:
            fieldnames = ['id', 'url', 'headline', 'author', 'date']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            for article in self.articles:
                writer.writerow(article)
    
    def save_to_database(self):
        # Save articles to SQLite database
        for article in self.articles:
            self.cursor.execute('SELECT * FROM articles WHERE url=?', (article['url'],))
            result = self.cursor.fetchone()
            if result is None:
                self.cursor.execute('INSERT INTO articles (id, url, headline, author, date) VALUES (?, ?, ?, ?, ?)',
                    (article['id'], article['url'], article['headline'], article['author'], article['date']))
        self.conn.commit()
    
    def run(self):
        self.scrape()
        self.save_csv()
        self.save_to_database()
        self.conn.close()

if __name__ == '__main__':
    scraper = VergeScraper('https://www.theverge.com/')
    scraper.run()
