In [3]:
import requests
import pandas as pd
import sqlite3
import random
from datetime import datetime, timedelta

RAPID_API_KEY = ""
DB_NAME = "linkedin_jobs.db"

def extract_linkedin_data(query="Data Analyst", location="India"):
    print(f"[{datetime.now()}] ðŸš€ Starting Extraction for '{query}' in '{location}'...")

    if RAPID_API_KEY:
        try:
            url = "https://linkedin-jobs-search.p.rapidapi.com/"
            payload = {"search_terms": query, "location": location, "page": "1"}
            headers = {
                "content-type": "application/json",
                "X-RapidAPI-Key": RAPID_API_KEY,
                "X-RapidAPI-Host": "linkedin-jobs-search.p.rapidapi.com"
            }
            response = requests.post(url, json=payload, headers=headers)

            if response.status_code == 200:
                print(" -> API Connection Successful!")
                data = response.json()
                df = pd.DataFrame(data)
                return df
            else:
                print(f" -> API Error {response.status_code}. Switching to Simulation...")
        except Exception as e:
            print(f" -> Connection failed ({e}). Switching to Simulation...")

    else:
        print(" -> No API Key detected. Running in SIMULATION MODE...")

    titles = ['Data Analyst', 'Junior Data Analyst', 'Senior Business Analyst',
              'Data Engineer', 'Analytics Consultant', 'BI Developer', 'Python Developer']

    companies = ['TechFlow', 'DataCorp', 'InnovateX', 'Omega Solutions', 'StartupHub']

    mock_data = {
        'job_id': range(1001, 1021),
        'job_title': [random.choice(titles) for _ in range(20)],
        'company_name': [random.choice(companies) for _ in range(20)],
        'location': ['Bangalore', 'Mumbai', 'Remote', 'Hyderabad', 'Pune'] * 4,
        'posted_date': [(datetime.now() - timedelta(days=random.randint(0, 10))).strftime('%Y-%m-%d') for _ in range(20)],
        'salary_raw': [random.choice(['$60k - $80k', 'Not Disclosed', '12 LPA', '15,000/mo', 'Competitive']) for _ in range(20)]
    }

    df = pd.DataFrame(mock_data)
    print(f" -> Extracted {len(df)} records successfully.")
    return df

def transform_data(df):
    print(f"[{datetime.now()}] ðŸ§¹ Starting Transformation...")

    df['job_title'] = df['job_title'].str.title()
    df['location'] = df['location'].str.strip()

    def categorize_level(title):
        title = title.lower()
        if 'senior' in title or 'lead' in title or 'sr' in title:
            return 'Senior'
        elif 'junior' in title or 'intern' in title or 'jr' in title:
            return 'Entry Level'
        else:
            return 'Mid Level'

    df['seniority_level'] = df['job_title'].apply(categorize_level)

    df['salary_disclosed'] = df['salary_raw'].apply(lambda x: False if 'Disclosed' in str(x) or 'Competitive' in str(x) else True)

    print(" -> Data Cleaned. Added 'seniority_level' column.")
    return df

def load_data(df):
    print(f"[{datetime.now()}] ðŸ’¾ Loading into Database ({DB_NAME})...")

    try:
        conn = sqlite3.connect(DB_NAME)
        df.to_sql('linkedin_jobs_daily', conn, if_exists='replace', index=False)
        conn.close()
        print(" -> Success! Pipeline Complete.")
    except Exception as e:
        print(f" -> Database Error: {e}")

if __name__ == "__main__":
    raw_df = extract_linkedin_data(query="Data Analyst", location="India")

    clean_df = transform_data(raw_df)

    load_data(clean_df)

    print("\n--- FINAL DB VIEW (Top 5 Rows) ---")
    print(clean_df[['job_title', 'location', 'seniority_level', 'salary_raw']].head().to_markdown(index=False))

[2025-11-26 18:07:04.118973] ðŸš€ Starting Extraction for 'Data Analyst' in 'India'...
 -> No API Key detected. Running in SIMULATION MODE...
 -> Extracted 20 records successfully.
[2025-11-26 18:07:04.119963] ðŸ§¹ Starting Transformation...
 -> Data Cleaned. Added 'seniority_level' column.
[2025-11-26 18:07:04.122288] ðŸ’¾ Loading into Database (linkedin_jobs.db)...
 -> Success! Pipeline Complete.

--- FINAL DB VIEW (Top 5 Rows) ---
| job_title               | location   | seniority_level   | salary_raw   |
|:------------------------|:-----------|:------------------|:-------------|
| Analytics Consultant    | Bangalore  | Mid Level         | 12 LPA       |
| Senior Business Analyst | Mumbai     | Senior            | Competitive  |
| Analytics Consultant    | Remote     | Mid Level         | $60k - $80k  |
| Junior Data Analyst     | Hyderabad  | Entry Level       | 12 LPA       |
| Senior Business Analyst | Pune       | Senior            | Competitive  |
