<a href="https://colab.research.google.com/github/Anbarasu2410/LinkedIn-Job-Market-Intelligence-Platform/blob/main/RealtimeLinkedin.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install requests beautifulsoup4 pandas
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import re
from datetime import datetime
from google.colab import files

print("✅ Setup completed!")

✅ Setup completed!


In [2]:
class FresherJobsInternshipsScraper:
    def __init__(self):
        self.jobs_data = []

    def scrape_fresher_opportunities(self):
        """
        Scrape both jobs and internships for freshers
        """

        roles = [
            'frontend web developer',
            'reactjs developer',
            'data analyst',
            'mis analyst',
            'powerbi developer',
            'full stack web developer',
            'web developer',
            'software developer'
        ]


        fresher_keywords = ['fresher', 'intern', 'internship', 'trainee']

        locations = ['Remote', 'Hybrid', 'Bangalore', 'Chennai', 'Madurai', 'Kochi', 'Coimbatore']

        print("🚀 Searching for fresher jobs and internships...")

        for location in locations:
            for role in roles:
                for keyword in fresher_keywords:
                    search_query = f"{role} {keyword}"
                    print(f"🔍 {search_query} in {location}")
                    self._scrape_opportunities(search_query, location, keyword)
                    time.sleep(1)

        return pd.DataFrame(self.jobs_data)

    def _scrape_opportunities(self, search_query, location, opportunity_type):
        """Scrape opportunities for specific search"""
        try:
            url = "https://www.linkedin.com/jobs-guest/jobs/api/seeMoreJobPostings/search"
            params = {
                'keywords': search_query,
                'location': location,
                'f_TPR': 'r86400',  # Last 24 hours
                'start': 0
            }

            response = requests.get(url, params=params, timeout=10)
            if response.status_code == 200:
                soup = BeautifulSoup(response.content, 'html.parser')
                job_cards = soup.find_all('div', class_='base-card')

                for card in job_cards:
                    job_info = self._extract_job_info(card, search_query, location, opportunity_type)
                    if job_info and self._is_it_company(job_info['Company']):
                        self.jobs_data.append(job_info)

        except Exception as e:
            print(f"❌ Error: {e}")

    def _extract_job_info(self, card, search_query, location, opportunity_type):
        """Extract job/internship information"""
        try:

            title_elem = card.find('h3', class_='base-search-card__title')
            title = title_elem.text.strip() if title_elem else ''


            company_elem = card.find('h4', class_='base-search-card__subtitle')
            company = company_elem.text.strip() if company_elem else ''


            link_elem = card.find('a', class_='base-card__full-link')
            job_url = link_elem.get('href') if link_elem else ''


            applicants_elem = card.find('span', class_='job-search-card__applicant-count')
            applicants_text = applicants_elem.text.strip() if applicants_elem else '0 applicants'


            applicant_count = self._parse_applicants(applicants_text)


            final_opp_type = self._determine_opportunity_type(title, opportunity_type)

            return {
                'Title': title,
                'Company': company,
                'Location': location,
                'Job URL': job_url,
                'Applicants': applicant_count,
                'Opportunity Type': final_opp_type,
                'Posted Date': datetime.now().strftime('%Y-%m-%d'),
                'Search Query': search_query
            }

        except Exception:
            return None

    def _determine_opportunity_type(self, title, initial_type):
        """Determine if it's job or internship based on title"""
        title_lower = title.lower()

        if any(word in title_lower for word in ['intern', 'internship']):
            return 'Internship'
        elif any(word in title_lower for word in ['fresher', 'trainee', 'junior']):
            return 'Fresher Job'
        else:
            return 'Job'

    def _parse_applicants(self, applicants_text):
        """Parse applicant count from text"""
        try:
            if 'over' in applicants_text.lower():
                numbers = re.findall(r'\d+', applicants_text)
                return int(numbers[0]) if numbers else None
            elif 'applicants' in applicants_text.lower():
                numbers = re.findall(r'\d+', applicants_text)
                return int(numbers[0]) if numbers else 0
            return 0
        except:
            return 0

    def _is_it_company(self, company_name):
        """Filter only IT companies (no institutes)"""
        if not company_name or company_name == 'N/A':
            return False

        institute_keywords = [
            'institute', 'college', 'university', 'academy', 'school',
            'training', 'education', 'coaching', 'classes', 'tution'
        ]

        company_lower = company_name.lower()


        if any(keyword in company_lower for keyword in institute_keywords):
            return False

        return True

scraper = FresherJobsInternshipsScraper()

In [3]:
# Run the scraper for both jobs and internships
print("🎯 Searching for fresher jobs and internships...")
opportunities_df = scraper.scrape_fresher_opportunities()

print(f"✅ Found {len(opportunities_df)} opportunities!")

🎯 Searching for fresher jobs and internships...
🚀 Searching for fresher jobs and internships...
🔍 frontend web developer fresher in Remote
🔍 frontend web developer intern in Remote
🔍 frontend web developer internship in Remote
🔍 frontend web developer trainee in Remote
🔍 reactjs developer fresher in Remote
🔍 reactjs developer intern in Remote
🔍 reactjs developer internship in Remote
🔍 reactjs developer trainee in Remote
🔍 data analyst fresher in Remote
🔍 data analyst intern in Remote
🔍 data analyst internship in Remote
🔍 data analyst trainee in Remote
🔍 mis analyst fresher in Remote
🔍 mis analyst intern in Remote
🔍 mis analyst internship in Remote
🔍 mis analyst trainee in Remote
🔍 powerbi developer fresher in Remote
🔍 powerbi developer intern in Remote
🔍 powerbi developer internship in Remote
🔍 powerbi developer trainee in Remote
🔍 full stack web developer fresher in Remote
🔍 full stack web developer intern in Remote
🔍 full stack web developer internship in Remote
🔍 full stack web deve

In [4]:
# Display results
if len(opportunities_df) > 0:
    print("📋 FRESHER JOBS & INTERNSHIPS FOUND:")
    print("=" * 80)


    display(opportunities_df[['Title', 'Company', 'Location', 'Opportunity Type', 'Applicants']])

    # Summary
    print(f"\n📊 SUMMARY:")
    print(f"• Total Opportunities: {len(opportunities_df)}")
    print(f"• Companies: {opportunities_df['Company'].nunique()}")
    print(f"• Jobs: {len(opportunities_df[opportunities_df['Opportunity Type'] == 'Job'])}")
    print(f"• Fresher Jobs: {len(opportunities_df[opportunities_df['Opportunity Type'] == 'Fresher Job'])}")
    print(f"• Internships: {len(opportunities_df[opportunities_df['Opportunity Type'] == 'Internship'])}")

    # Show low competition opportunities
    low_competition = opportunities_df[opportunities_df['Applicants'] < 10]
    if len(low_competition) > 0:
        print(f"• Opportunities with <10 applicants: {len(low_competition)}")
        print("\n🎯 LOW COMPETITION OPPORTUNITIES (<10 applicants):")
        display(low_competition[['Title', 'Company', 'Location', 'Opportunity Type', 'Applicants']])
else:
    print("❌ No opportunities found today")

📋 FRESHER JOBS & INTERNSHIPS FOUND:


Unnamed: 0,Title,Company,Location,Opportunity Type,Applicants
0,Angular Developer,People Prime Worldwide,Remote,Job,0
1,Node Js Developer,People Prime Worldwide,Remote,Job,0
2,Front-End Developer Intern,WEBBOOST SOLUTION IT SERVICES,Remote,Internship,0
3,front-end developer intern,Geniuses Factory,Remote,Internship,0
4,UI Frontend Developer Internship,TripFactory.com,Remote,Internship,0
...,...,...,...,...,...
585,Full-Stack Engineer,Uplers,Coimbatore,Job,0
586,Full Stack Developer Intern,OneDot Communications,Coimbatore,Internship,0
587,Full Stack Developer Intern,Neuronest AI Pvt Ltd,Coimbatore,Internship,0
588,Django + AI Developer Intern (0-1 year of expe...,Codevantage Inc,Coimbatore,Internship,0



📊 SUMMARY:
• Total Opportunities: 590
• Companies: 222
• Jobs: 339
• Fresher Jobs: 33
• Internships: 218
• Opportunities with <10 applicants: 590

🎯 LOW COMPETITION OPPORTUNITIES (<10 applicants):


Unnamed: 0,Title,Company,Location,Opportunity Type,Applicants
0,Angular Developer,People Prime Worldwide,Remote,Job,0
1,Node Js Developer,People Prime Worldwide,Remote,Job,0
2,Front-End Developer Intern,WEBBOOST SOLUTION IT SERVICES,Remote,Internship,0
3,front-end developer intern,Geniuses Factory,Remote,Internship,0
4,UI Frontend Developer Internship,TripFactory.com,Remote,Internship,0
...,...,...,...,...,...
585,Full-Stack Engineer,Uplers,Coimbatore,Job,0
586,Full Stack Developer Intern,OneDot Communications,Coimbatore,Internship,0
587,Full Stack Developer Intern,Neuronest AI Pvt Ltd,Coimbatore,Internship,0
588,Django + AI Developer Intern (0-1 year of expe...,Codevantage Inc,Coimbatore,Internship,0


In [5]:
# Save to Google Sheets format
def save_opportunities_to_sheets(opportunities_df):
    """Save opportunities data for Google Sheets"""

    if len(opportunities_df) == 0:
        print("No opportunities to save")
        return

    # Create clean dataframe
    sheets_df = opportunities_df[[
        'Title',
        'Company',
        'Location',
        'Opportunity Type',
        'Applicants',
        'Job URL',
        'Posted Date'
    ]].copy()

    # Sort by opportunity type and applicants
    sheets_df = sheets_df.sort_values(['Opportunity Type', 'Applicants'])

    # Save as CSV
    filename = f"fresher_opportunities_{datetime.now().strftime('%Y%m%d')}.csv"
    sheets_df.to_csv(filename, index=False)

    print(f"💾 Saved {len(sheets_df)} opportunities to {filename}")

    # Download the file
    files.download(filename)

    return sheets_df

# Save and download
if len(opportunities_df) > 0:
    final_data = save_opportunities_to_sheets(opportunities_df)
    print("✅ File downloaded! Import this CSV into Google Sheets")

💾 Saved 590 opportunities to fresher_opportunities_20251007.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ File downloaded! Import this CSV into Google Sheets


In [6]:
# Simple one-click function for daily use
def get_daily_opportunities():
    """
    One function to get all fresher jobs and internships daily
    """
    print("🚀 GETTING TODAY'S FRESHER OPPORTUNITIES...")
    print("🎯 Includes: Jobs, Fresher Jobs, Internships")
    print("💼 Roles: Frontend, ReactJS, Data Analyst, MIS, PowerBI, Full Stack, Web Dev")
    print("📍 Locations: Remote, Hybrid, Bangalore, Chennai, Madurai, Kochi, Coimbatore")
    print("⏰ Posted: Last 24 hours")

    # Get opportunities
    opportunities = scraper.scrape_fresher_opportunities()

    if len(opportunities) > 0:
        # Save file with timestamp
        filename = f"IT_Fresher_Opportunities_{datetime.now().strftime('%Y%m%d_%H%M')}.csv"
        opportunities.to_csv(filename, index=False)

        # Download
        files.download(filename)

        # Summary
        jobs_count = len(opportunities[opportunities['Opportunity Type'] == 'Job'])
        fresher_jobs_count = len(opportunities[opportunities['Opportunity Type'] == 'Fresher Job'])
        internships_count = len(opportunities[opportunities['Opportunity Type'] == 'Internship'])
        low_comp_count = len(opportunities[opportunities['Applicants'] < 10])

        print(f"\n🎉 SUCCESS!")
        print(f"📥 Downloaded: {filename}")
        print(f"📊 Total Opportunities: {len(opportunities)}")
        print(f"💼 Jobs: {jobs_count} | 👶 Fresher Jobs: {fresher_jobs_count} | 🎓 Internships: {internships_count}")
        print(f"👥 Low Competition (<10 applicants): {low_comp_count}")

        print(f"\n🏢 COMPANIES HIRING TODAY:")
        print("-" * 40)
        for company in sorted(opportunities['Company'].unique()):
            company_opps = opportunities[opportunities['Company'] == company]
            job_types = company_opps['Opportunity Type'].value_counts().to_dict()
            type_str = ", ".join([f"{count} {typ}" for typ, count in job_types.items()])
            print(f"• {company} ({type_str})")

    else:
        print("❌ No opportunities found today")

    return opportunities

# 🎯 RUN THIS CELL TO GET ALL OPPORTUNITIES
daily_opportunities = get_daily_opportunities()

🚀 GETTING TODAY'S FRESHER OPPORTUNITIES...
🎯 Includes: Jobs, Fresher Jobs, Internships
💼 Roles: Frontend, ReactJS, Data Analyst, MIS, PowerBI, Full Stack, Web Dev
📍 Locations: Remote, Hybrid, Bangalore, Chennai, Madurai, Kochi, Coimbatore
⏰ Posted: Last 24 hours
🚀 Searching for fresher jobs and internships...
🔍 frontend web developer fresher in Remote
🔍 frontend web developer intern in Remote
🔍 frontend web developer internship in Remote
🔍 frontend web developer trainee in Remote
🔍 reactjs developer fresher in Remote
🔍 reactjs developer intern in Remote
🔍 reactjs developer internship in Remote
🔍 reactjs developer trainee in Remote
🔍 data analyst fresher in Remote
🔍 data analyst intern in Remote
🔍 data analyst internship in Remote
🔍 data analyst trainee in Remote
🔍 mis analyst fresher in Remote
🔍 mis analyst intern in Remote
🔍 mis analyst internship in Remote
🔍 mis analyst trainee in Remote
🔍 powerbi developer fresher in Remote
🔍 powerbi developer intern in Remote
🔍 powerbi developer in

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


🎉 SUCCESS!
📥 Downloaded: IT_Fresher_Opportunities_20251007_0748.csv
📊 Total Opportunities: 1181
💼 Jobs: 681 | 👶 Fresher Jobs: 66 | 🎓 Internships: 434
👥 Low Competition (<10 applicants): 1181

🏢 COMPANIES HIRING TODAY:
----------------------------------------
• AARATECH (12 Job)
• ALDI USA (4 Job)
• ATC (46 Job)
• AXA Investment Managers (2 Internship)
• AXM365 (2 Internship)
• Accenture Poland (2 Internship)
• Accenture in India (12 Job)
• Accenture services Pvt Ltd (20 Job)
• ActioNet, Inc. (2 Job)
• Adept Global (4 Job)
• Aegon (5 Internship)
• Akaike Technologies (4 Job)
• Alkye (2 Fresher Job)
• Allime Tech Solutions (1 Job)
• Altom Transport (2 Internship)
• Amazon (16 Job)
• Amazon Web Services (AWS) (14 Job)
• Art Technology and Software (2 Job)
• Assetcare Services Private Limited (2 Job)
• Avantor (2 Job)
• BAE Systems, Inc. (4 Internship)
• BICE VIDA (2 Job)
• BLENIX CHAIN TECHNOLOGY (2 Job)
• BNP Paribas (2 Job)
• BairesDev (28 Job)
• BangDB (2 Job)
• BeaconFire Inc. (6 Fre

In [7]:
# Quick daily update function
def quick_daily_update():
    """
    Run this every day for quick update
    """
    print(f"📅 DAILY UPDATE - {datetime.now().strftime('%Y-%m-%d')}")
    print("Getting today's fresher jobs and internships...")

    opportunities = scraper.scrape_fresher_opportunities()

    if len(opportunities) > 0:
        # Save simple file
        filename = f"Today_Opportunities_{datetime.now().strftime('%m%d')}.csv"
        opportunities.to_csv(filename, index=False)
        files.download(filename)

        print(f"✅ Today's file downloaded: {filename}")
        print(f"📊 Found: {len(opportunities)} opportunities")

        # Quick summary
        for opp_type in ['Job', 'Fresher Job', 'Internship']:
            count = len(opportunities[opportunities['Opportunity Type'] == opp_type])
            if count > 0:
                print(f"   • {opp_type}: {count}")

    return opportunities

# Uncomment to run quick daily update
# today = quick_daily_update()