In [8]:
# Extract data from web(weather) using web scrapping

import requests
from bs4 import BeautifulSoup
import pandas as pd

URL = "https://www.timeanddate.com/weather/usa/new-york/historic"
response = requests.get(URL)
soup = BeautifulSoup(response.content, 'html.parser')

# Find all table rows
rows = soup.find_all('tr')

# Find the table containing the weather data (adjust selector based on actual table structure)
table = soup.find('table')

# Check if the table is found
if table:
    # Extract table headers (column names)
    headers = [header.text.strip() for header in table.find_all('th')]

    # Print the headers for debugging
    print("Extracted Headers:", headers)
else:
    print("Table not found on the webpage.")


# List to store scraped data
web_scraped_data = []

# Loop through 
for row in rows[1:]:  # Skip the header row
    columns = row.find_all('td')  # Find all table data cells
    
    if len(columns) >= 7:  # Ensure there are enough columns
        try:
            # Extract and clean the data for each column
            temp = columns[1].text.strip()  
            weather = columns[2].text.strip()
            wind = columns[3].text.strip()
            humidity = columns[5].text.strip()
            barometer = columns[6].text.strip()
            visibility = columns[7].text.strip()

            # Append the data as a dictionary to the list
            web_scraped_data.append({
                'Temp': temp,
                'Weather': weather,
                'Wind': wind,
                'Humidity': humidity,
                'Barometer': barometer,
                'Visibility': visibility,
            })
        except Exception as e:
            print(f"Error processing row: {[col.text.strip() for col in columns]}, Error: {e}")

# Convert the list of dictionaries into a Pandas DataFrame
if web_scraped_data:
    web_scraping_df = pd.DataFrame(web_scraped_data)
    print(web_scraping_df)
else:
    print("No valid data was scraped.")




Extracted Headers: ['', 'Temperature', 'Humidity', 'Pressure', 'High', 'Low', 'Average']
    Temp                Weather     Wind Humidity  Barometer Visibility
0   5 °C                 Clear.  11 km/h      58%  1010 mbar      16 km
1   5 °C                 Clear.   7 km/h      58%  1010 mbar      16 km
2   5 °C                 Clear.  11 km/h      60%  1010 mbar      16 km
3   6 °C                 Clear.  11 km/h      60%  1009 mbar      16 km
4   6 °C                 Clear.  13 km/h      60%  1009 mbar      16 km
5   6 °C        Passing clouds.  13 km/h      65%  1008 mbar      16 km
6   6 °C                 Clear.  15 km/h      62%  1007 mbar      16 km
7   6 °C                 Clear.      N/A      65%  1006 mbar      16 km
8   6 °C              Overcast.   9 km/h      65%  1005 mbar      16 km
9   6 °C                 Clear.  22 km/h      68%  1004 mbar      16 km
10  6 °C        Passing clouds.  19 km/h      74%  1003 mbar      16 km
11  6 °C        Passing clouds.  22 km/h      7

In [10]:
import psycopg2
from psycopg2 import sql

# Database connection parameters
DB_PARAMS = {
    'dbname': 'weather',
    'user': 'postgres',
    'password': 'KARU55bime22',
    'host': 'localhost',  # Or your database host
    'port': '5432',       # Default PostgreSQL port
}

try:
    # Connect to the PostgreSQL database
    conn = psycopg2.connect(**DB_PARAMS)
    cursor = conn.cursor()
    
    # SQL query to insert data into the weather_data table
    insert_query = sql.SQL("""
        INSERT INTO weather_details (temp, weather, wind, humidity, barometer, visibility)
        VALUES (%s, %s, %s, %s, %s, %s)
    """)
    
    # Insert each row of the DataFrame into the database
    for _, row in web_scraping_df.iterrows():
        cursor.execute(insert_query, (
            row['Temp'], 
            row['Weather'], 
            row['Wind'], 
            row['Humidity'], 
            row['Barometer'], 
            row['Visibility']
        ))
    
    # Commit the transaction
    conn.commit()
    print("Data successfully inserted into the database.")
    
except Exception as e:
    print(f"Error while inserting data: {e}")
    
finally:
    # Close the cursor and connection
    if cursor:
        cursor.close()
    if conn:
        conn.close()


Data successfully inserted into the database.


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the COVID-19 data webpage
URL = "https://www.worldometers.info/coronavirus/"

# Send a GET request to fetch the webpage content
response = requests.get(URL)
soup = BeautifulSoup(response.content, 'html.parser')

# Find the table containing the COVID-19 data
table = soup.find('table', id='main_table_countries_today')

# Extract table headers (column names)
headers = [header.text.strip() for header in table.find_all('th')]

# Debugging: Print extracted headers
print("Extracted Headers:", headers)

# Define the columns of interest (match with extracted headers)
desired_columns = [
    "Country,Other",
    "TotalCases",
    "TotalDeaths",
    "NewCases",
    "NewDeaths",
    "TotalRecovered",
    "ActiveCases",
    "Deaths/1M pop",
    "Population"
]

# Find all rows in the table
rows = table.find('tbody').find_all('tr')

# List to store scraped data
covid_data = []

# Iterate through the rows and extract data
for row in rows:
    columns = row.find_all('td')
    if columns:  # Ensure the row has data
        data = [col.text.strip() for col in columns]
        if len(data) >= len(headers):  # Avoid short rows
            # Create a dictionary for the desired columns
            covid_row = {headers[i]: data[i] for i in range(len(headers)) if headers[i] in desired_columns}
            covid_data.append(covid_row)

# Convert the list of dictionaries into a Pandas DataFrame
covid_df = pd.DataFrame(covid_data)

# Keep only the desired columns (if they exist in the DataFrame)
available_columns = [col for col in desired_columns if col in covid_df.columns]
covid_df = covid_df[available_columns]

# Display the DataFrame
print(covid_df)
