In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd

# Initialize WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# Open Glassdoor's job listings page for summer internships
driver.get("https://www.glassdoor.com/Job/united-states-data-analyst-jobs-SRCH_IL.0,13_IN1_KO14,26.htm")

# Wait for the page to fully load
time.sleep(5)

# Scrape job titles
job_titles = driver.find_elements(By.CLASS_NAME, "JobCard_jobTitle___7I6y")

# Scrape job locations
job_locations = driver.find_elements(By.CLASS_NAME, "JobCard_location__rCz3x")

# Scrape company names (more generic path using class name for company)
job_companies = driver.find_elements(By.CLASS_NAME, 'EmployerProfile_compactEmployerName__LE242')

# Create an empty list to hold job data
job_listings = []

# Determine the number of job listings (based on job titles, since this is mandatory)
num_jobs = len(job_titles)

# Iterate through the job listings and extract title, location, and company
for i in range(num_jobs):
    try:
        title = job_titles[i].text if i < len(job_titles) else "N/A"
        location = job_locations[i].text if i < len(job_locations) else "N/A"
        company = job_companies[i].text if i < len(job_companies) else "N/A"
        
        job_listings.append({
            "Job Title": title,
            "Location": location,
            "Company": company
        })
    except Exception as e:
        print(f"Error scraping job {i}: {e}")
        continue

# Convert the list of dictionaries to a pandas DataFrame
df = pd.DataFrame(job_listings)

# Save DataFrame to CSV
df.to_csv("glassdoor_job_listings.csv", index=False)

# Close the WebDriver
driver.quit()

# Display the DataFrame
print(df)

                                         Job Title              Location  \
0                                     Data Analyst      Saint Joseph, MO   
1                            Business Data Analyst        Pennsauken, NJ   
2                            Capacity Data Analyst       San Antonio, TX   
3   Business Intelligence Analyst - Legacy of Hope        Birmingham, AL   
4                                     Data Analyst        Post Falls, ID   
5                            Power BI Data Analyst          Stafford, VA   
6                                  Sr Data Analyst     Prince George, VA   
7                           Data Analysts - Remote                Remote   
8                             Data Quality Analyst          Columbia, MO   
9                                  Product Analyst             Piqua, OH   
10                         Operations Data Analyst     Oklahoma City, OK   
11                                    Data Analyst          Cheyenne, WY   
12          

In [2]:
import psycopg2
conn = psycopg2.connect(
    host = "localhost",
    database = "sql_analysis",
    user = "postgres",
    password = "Schavan#",
    port=5432
)

cur = conn.cursor()

for _, row in df.iterrows():
    cur.execute("""
            INSERT INTO internships (title,company,location) VALUES (%s,%s,%s)
            """,(row['Job Title'],row['Company'],row['Location']))

conn.commit()
cur.close()
conn.close()
print(df)

                                         Job Title              Location  \
0                                     Data Analyst      Saint Joseph, MO   
1                            Business Data Analyst        Pennsauken, NJ   
2                            Capacity Data Analyst       San Antonio, TX   
3   Business Intelligence Analyst - Legacy of Hope        Birmingham, AL   
4                                     Data Analyst        Post Falls, ID   
5                            Power BI Data Analyst          Stafford, VA   
6                                  Sr Data Analyst     Prince George, VA   
7                           Data Analysts - Remote                Remote   
8                             Data Quality Analyst          Columbia, MO   
9                                  Product Analyst             Piqua, OH   
10                         Operations Data Analyst     Oklahoma City, OK   
11                                    Data Analyst          Cheyenne, WY   
12          

In [3]:
# from sqlalchemy import create_engine

# # Test connection
# try:
#     engine = create_engine("postgresql://postgres:Schavan#@localhost/sql_analysis")
#     conn = engine.connect()
#     print("Connection successful!")
#     conn.close()
# except Exception as e:
#     print(f"Error connecting to the database: {e}")


In [4]:
from sqlalchemy import create_engine, text
import pandas as pd

# Correct PostgreSQL connection string format
conn_str = "postgresql+psycopg2://postgres:Schavan#@localhost/sql_analysis"

# Establish connection using the engine
try:
    engine = create_engine(conn_str)
    conn = engine.connect()
    print("Connection successful!")
    
    # Define the SQL query
    query = text("""
    SELECT title, company, location
    FROM internships
    WHERE title ILIKE '%%Data%%'
    """)
    
    # Execute the query using connection.execute() and fetch results
    result_proxy = conn.execute(query)
    
    # Fetch all rows from the result
    results = result_proxy.fetchall()
    
    # Get column names
    columns = result_proxy.keys()
    
    # Create a DataFrame from the results
    df = pd.DataFrame(results, columns=columns)
    
    # Check if DataFrame is empty or not
    if not df.empty:
        # Save the DataFrame to a CSV file
        df.to_csv('filtered_internships_report.csv', index=False)
        print("Report generated successfully!")
    else:
        print("No data found for the query.")
    
    # Print the DataFrame
    print(df)
    
    # Close the connection
    conn.close()

except Exception as e:
    print(f"Error executing the query: {e}")

# Dispose of the engine
engine.dispose()


Connection successful!
Report generated successfully!
                                        title  \
0                                Data Analyst   
1                                Data Analyst   
2                       Business Data Analyst   
3                    Atmospheric Data Analyst   
4                                Data Analyst   
..                                        ...   
117                              Data Analyst   
118                  Procurement Data Analyst   
119         Junior Financial and Data Analyst   
120                              Data Analyst   
121  Alpha Data Operations Analyst, Associate   

                                            company          location  
0                 Missouri Western State University  Saint Joseph, MO  
1                                    PRC Industries   Spruce Pine, NC  
2                                  Pro Capital, LLC    Pennsauken, NJ  
3    Space Sciences & Engineering LLC, DBA PlanetiQ        Golden, CO

In [5]:
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders

def send_email_with_report(report_path):
    # Sender and receiver email addresses
    sender_email = "ceratopsezpz5@gmail.com"
    recipient_email = "soujanyachavan25@gmail.com"
    
    # Email subject and body
    subject = "Daily Internship Report"
    body = "Please find attached the latest report on summer internships."
    
    # Create the email headers
    msg = MIMEMultipart()
    msg['From'] = sender_email
    msg['To'] = recipient_email
    msg['Subject'] = subject
    
    # Attach the body of the email
    msg.attach(MIMEText(body, 'plain'))
    
    # Open and attach the CSV report
    try:
        attachment = open(report_path, 'rb')
        part = MIMEBase('application', 'octet-stream')
        part.set_payload(attachment.read())
        encoders.encode_base64(part)
        part.add_header('Content-Disposition', f'attachment; filename=report.csv')
        msg.attach(part)
    except Exception as e:
        print(f"Error attaching the file: {e}")
        return

    # Send the email
    try:
        # Connect to Gmail's SMTP server
        server = smtplib.SMTP('smtp.gmail.com', 587)
        server.starttls()  # Start TLS encryption
        
        # Login to the sender's email account using App Password
        server.login(sender_email, 'hxde kngk vabg noyz')  # Use App Password here
        
        # Convert message to string format and send
        text = msg.as_string()
        server.sendmail(sender_email, recipient_email, text)
        
        # Quit the server
        server.quit()
        print("Email sent successfully!")
    except Exception as e:
        print(f"Error sending email: {e}")

# Example usage
send_email_with_report('filtered_internships_report.csv')


Email sent successfully!


In [6]:
``

SyntaxError: invalid syntax (1016415176.py, line 1)