# Resume_Analyzer Coding

#### PyPDF2 for handle PDF files, extract text, and read page content.
#### pandas for To manage and manipulate tabular data (e.g., converting results into structured formats like Excel).
#### google-generativeai for interacting with Google's Generative AI APIs, such as Gemini for analyzing resume content and generating structured responses.
#### tqdm for adding progress bars for visual feedback when processing multiple files.
#### google-api-python-client for interacting with Google APIs, such as listing and downloading files from Google Drive.
#### google-auth for handling authentication with Google APIs using a service account.


In [3]:
!pip install PyPDF2   
!pip install pandas
!pip install google-generativeai
!pip install --upgrade google-generativeai
!pip install tqdm
!pip install google-api-python-client
!pip install google-auth
!pip install google-api-python-client
!pip install PyPDF2 pandas google-generativeai tqdm google-api-python-client google-auth

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


### Standard Python Libraries

In [5]:
# For file and directory operations
import os  

# For parsing and working with JSON data
import json 

 # For type annotations
from typing import Dict, List, Any 

### External Libraries

In [7]:
 # For reading and extracting text from PDF files
import PyPDF2 

# For handling dataframes and saving data to Excel
import pandas as pd  

# For interacting with Google's Generative AI
import google.generativeai as genai  

# For logging messages and errors during execution
import logging  

# For introducing delays in API retries
from time import sleep  

# For regular expressions to extract specific data patterns
import re  

# For concurrent processing of multiple files
from concurrent.futures import ThreadPoolExecutor  

# For displaying progress bars during long-running operations
from tqdm import tqdm 

### Google Drive API Libraries

In [9]:
# For interacting with Google APIs (e.g., Google Drive API)
from googleapiclient.discovery import build  

# For authenticating with Google APIs using service accounts
from google.oauth2.service_account import Credentials 

# For downloading files from Google Drive
from googleapiclient.http import MediaIoBaseDownload  

### Initialize logging to track execution and debug issues

In [11]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

### ResumeAnalyzer Class

### Workflow

## 1. Initialization:
#### Set up Generative AI.
#### Authenticate with Google Drive API.

### 2. Google Drive Integration:
#### List and download resumes from Google Drive.

### 3. Resume Analysis:
#### Extract text from PDFs.
#### Use Generative AI to analyze resumes.

### 4. Batch Processing:
#### Process resumes in batches for scalability and efficiency.

### 5. Excel Report Generation:
#### Save analysis results and detailed insights into an Excel file.

In [14]:
class ResumeAnalyzer:
    def __init__(self, api_key: str, credentials_file: str = None, credentials_dict: Dict = None):
        """
        Initialize ResumeAnalyzer with API key and either credentials file or dictionary
        
        Args:
            api_key (str): Gemini API key
            credentials_file (str, optional): Path to Google Drive service account credentials file
            credentials_dict (Dict, optional): Google Drive service account credentials as dictionary

        Workflow:
        - Set up the Generative AI API (Gemini) using the provided API key.
        - Initialize the Generative AI model.
        - Configure Google Drive API authentication:
        - If `credentials_dict` is provided, authenticate using the dictionary.
        - If `credentials_file` is provided, authenticate using the file.
        """

        # Step 1: Set up the Generative AI API with the provided API key
        self.setup_genai(api_key)

        # Step 2: Initialize the Generative AI model
        self.setup_model()
        
        # Step 3: Configure Google Drive API authentication
        # Option 1: Authenticate using credentials dictionary (if provided)
        if credentials_dict:
            self.setup_drive_api_from_dict(credentials_dict)

        # Option 2: Authenticate using credentials file (if provided)
        elif credentials_file:
            self.setup_drive_api_from_file(credentials_file)
            

    # Function to set up GenAI with the provided API key
    # This function configures the GenAI service using the given API key.
    def setup_genai(self, api_key: str) -> None:
        genai.configure(api_key=api_key)


    # Setup Gemini 1.5-Flash Model for Optimized Processing
    # This method initializes the 'gemini-1.5-flash' model for faster and more efficient 
    # performance in tasks requiring generative AI.
    def setup_model(self) -> None:
        """Initialize Gemini 1.5-flash model for faster processing"""
        self.model = genai.GenerativeModel('gemini-1.5-flash')

    
    # Method to Set Up Google Drive API from a Credentials Dictionary
    def setup_drive_api_from_dict(self, credentials_dict: Dict) -> None:
        """Initialize Google Drive API using credentials dictionary.
        Args:
        credentials_dict (Dict): Dictionary containing the service account credentials.

        Raises:
        Exception: If there is an error in setting up the API with the provided credentials. 
        """
        try:
             # Load credentials from the provided dictionary
            self.creds = Credentials.from_service_account_info(
                credentials_dict,
                scopes=['https://www.googleapis.com/auth/drive']
            )
            # Initialize the Google Drive API client
            self.drive_service = build('drive', 'v3', credentials=self.creds)
            
        except Exception as e:
            # Log any errors during setup and re-raise the exception
            logging.error(f"Error setting up Drive API from dict: {str(e)}")
            raise

    
    # Method to Set Up Google Drive API from a Credentials File
    def setup_drive_api_from_file(self, credentials_file: str) -> None:
        """Initialize Google Drive API using credentials file.
        Args:
        credentials_file (str): Path to the service account credentials file.

        Raises:
        Exception: If there is an error in setting up the API with the provided credentials file.
        """
        try:
            # Load credentials from the provided file
            self.creds = Credentials.from_service_account_file(
                credentials_file,
                scopes=['https://www.googleapis.com/auth/drive']
            )
            # Initialize the Google Drive API client
            self.drive_service = build('drive', 'v3', credentials=self.creds)
            
        except Exception as e:
            # Log any errors during setup and re-raise the exception
            logging.error(f"Error setting up Drive API from file: {str(e)}")
            raise
        

    # Method to List All PDF Files in a Google Drive Folder
    def list_files_in_drive_folder(self, folder_id: str) -> List[Dict[str, str]]:
        """List all PDF files in a Google Drive folder.
         Args:
        folder_id (str): The ID of the folder in Google Drive to search for PDF files.

        Returns:
        List[Dict[str, str]]: A list of dictionaries, each containing the ID and name of a PDF file.

        Logs:
        Logs the number of PDF files found in the specified folder.
        """

        # Define query to search for PDF files in the specified folder, ensuring they are not trashed
        query = f"'{folder_id}' in parents and mimeType='application/pdf' and trashed = false"

        # Execute the query to list files in the folder
        results = self.drive_service.files().list(q=query, fields="files(id, name)").execute()

        # Extract the list of files from the results
        files = results.get('files', [])

         # Log the number of files found
        logging.info(f"Found {len(files)} files in the folder.")

        # Return the list of found files
        return files

    

    def download_file_from_drive(self, file_id: str, output_path: str) -> None:
        """Download a file from Google Drive.
        
        Args:
        file_id (str): The ID of the file to be downloaded from Google Drive.
        output_path (str): The local path where the downloaded file will be saved.
    
        Logs:
        Logs the progress of the file download as a percentage."""

        # Create a request to fetch the file from Google Drive
        request = self.drive_service.files().get_media(fileId=file_id)

        # Open the output file in write-binary mode to store the downloaded content
        with open(output_path, 'wb') as f:
             # Initialize a downloader object to handle the file download
            downloader = MediaIoBaseDownload(f, request)
            done = False

             # Loop to download the file in chunks until it's fully downloaded
            while not done:
                status, done = downloader.next_chunk()
                 # Log the download progress as a percentage
                logging.info(f"Download progress: {int(status.progress() * 100)}%")

    
    # Method to Download All Resumes from a Google Drive Folder to a Local Folder
    def download_resumes_from_drive(self, folder_id: str, output_folder: str) -> List[str]:
        """Download all resumes from a Google Drive folder to a local folder.
        
        Args:
        folder_id (str): The ID of the folder in Google Drive containing the resume files.
        output_folder (str): The local directory where the resumes will be saved.

        Returns:
            List[str]: A list of file paths to the downloaded resumes.

        Logs:
            Logs the progress of downloading each resume.
        """

        # Retrieve the list of PDF files in the specified folder from Google Drive
        files = self.list_files_in_drive_folder(folder_id)

        # Initialize an empty list to store paths of downloaded files
        downloaded_files = []

        # Iterate over each file in the folder
        for file in files:
             # Get the file ID and name
            file_id = file['id']
            file_name = file['name']

            # Construct the full local path to save the downloaded file
            output_path = os.path.join(output_folder, file_name)
            
            # Download the file from Google Drive and save it to the local path
            self.download_file_from_drive(file_id, output_path)

            # Append the local path of the downloaded file to the list
            downloaded_files.append(output_path)

        # Return the list of downloaded file paths
        return downloaded_files


    # Method to Extract Text from a PDF File
    def extract_text_from_pdf(self, pdf_path: str) -> str:
        """
        Extract the text content from a PDF file.
    
        Args:
        pdf_path (str): The local path to the PDF file from which text will be extracted.

        Returns:
        str: The extracted text from the PDF, with excess whitespace and non-ASCII characters removed.

        Logs:
        Logs any error that occurs during the extraction process.
        """
        try:
            text = ""
            # Open the PDF file in binary read mode
            with open(pdf_path, "rb") as file:
                reader = PyPDF2.PdfReader(file)

                # Iterate through all pages in the PDF
                for page in reader.pages:
                    # Extract text from each page
                    page_text = page.extract_text()

                    # Clean up the extracted text: remove extra whitespace and non-ASCII characters
                    page_text = re.sub(r'\s+', ' ', page_text)
                    page_text = re.sub(r'[^\x00-\x7F]+', '', page_text)

                    # Append the cleaned text from each page to the result
                    text += page_text + "\n"

            # Return the cleaned text from the entire PDF
            return text.strip()

        except Exception as e:
            # Log any error encountered during text extraction
            logging.error(f"Error extracting text from PDF {pdf_path}: {str(e)}")
            return ""

    
    # Method to Clean and Extract JSON Content from a Response String
    def clean_json_response(self, response_text: str) -> str:
        """
        Clean and extract JSON content from a given response string.
    
        Args:
            response_text (str): The response text containing the JSON data to be cleaned and extracted.

        Returns:
            str: A cleaned version of the extracted JSON string, with unnecessary characters removed.
    
        Logs:
            Logs any errors encountered during the extraction and cleaning process.
        """
        try:
            # Use regular expression to find the JSON content in the response
            json_content = re.search(r'\{.*\}', response_text, re.DOTALL)

            # If JSON content is found, clean and format it
            if json_content:
                json_str = json_content.group()

                 # Remove any ` ```json ` and ` ``` ` markers
                json_str = re.sub(r'```json|```', '', json_str)

                # Replace multiple spaces with a single space to clean up the formatting
                json_str = re.sub(r'\s+', ' ', json_str)

                # Return the cleaned JSON string
                return json_str.strip()
            return ""    # Return the cleaned JSON string
            
        except Exception as e:
            # Log any error encountered during the cleaning process
            logging.error(f"Error cleaning JSON response: {str(e)}")
            return ""

    def analyze_resume(self, text: str) -> Dict[str, Any]:
        # Few-shot examples for better context
        few_shot_examples = """
        Example 1:
        Input: "John Doe, BS Computer Science, Stanford University, 3.8 GPA..."
        Output: {"name": "John Doe", "university": "Stanford University", "course": "BS Computer Science"...}
        
        Example 2:
        Input: "Jane Smith, Machine Learning Engineer with 3 years experience..."
        Output: {"name": "Jane Smith", "ai_ml_score": 3, "gen_ai_score": 2...}
        """
        
        prompt = f"""
        {few_shot_examples}
        
        Analyze this resume carefully and extract information according to these specific rules:

        Scoring Rules:
        1. Gen AI Experience Score (1-3):
           - Score 1: Basic exposure or theoretical knowledge
           - Score 2: Hands-on projects or internships with Gen AI
           - Score 3: Advanced work (e.g., Agentic RAG, Evals, LLM fine-tuning)
           
        2. AI/ML Experience Score (1-3):
           - Score 1: Basic ML/AI knowledge or coursework
           - Score 2: Hands-on ML/AI projects or internships
           - Score 3: Advanced ML/AI work (e.g., research, complex implementations)

        Required JSON structure:
        {{
            "name": "full name",
            "contact_details": "all contact information as shown",
            "university": "university name",
            "year_of_study": "current year or graduation year",
            "course": "degree name",
            "discipline": "field of study",
            "cgpa_percentage": "exact CGPA or percentage",
            "key_skills": ["all technical and relevant skills"],
            "gen_ai_score": "1-3 based on rules above",
            "ai_ml_score": "1-3 based on rules above",
            "supporting_info": {{
                "certifications": ["list of certifications"],
                "internships": ["list of internships with details"],
                "projects": [
                    {{
                        "name": "project name",
                        "description": "brief description",
                        "technologies": ["technologies used"]
                    }}
                ]
            }},
            "additional_insights": {{
                "career_potential": "brief assessment of career trajectory",
                "technical_strength": "evaluation of technical capabilities",
                "experience_level": "overall experience assessment"
            }}
        }}

        Resume text to analyze:
        {text}
        """

        # Method to Generate and Analyze Content from a Model with Retry Logic
        try:
            max_retries = 3   # Set the maximum number of retry attempts
            for attempt in range(max_retries):
                try:
                    # Attempt to generate content using the model with the given prompt
                    response = self.model.generate_content(prompt)

                    # Check if the response is empty, raise an error if so
                    if not response.text:
                        raise ValueError("Empty response received")

                    # Clean and extract JSON content from the response text
                    json_str = self.clean_json_response(response.text)

                    # Check if the cleaned response is valid, raise an error if not
                    if not json_str:
                        raise ValueError("No valid JSON found in response")

                    # Parse the cleaned JSON string into a Python dictionary
                    data = json.loads(json_str)
                    
                    # Define required fields that must be present in the parsed data
                    required_fields = [
                        "name", "contact_details", "university", "year_of_study",
                        "course", "discipline", "cgpa_percentage", "key_skills",
                        "gen_ai_score", "ai_ml_score"
                    ]

                    # Check for missing required fields
                    missing_fields = [field for field in required_fields if field not in data]

                     # Raise an error if any required fields are missing
                    if missing_fields:
                        raise ValueError(f"Missing required fields: {missing_fields}")

                    # Return the parsed data if everything is valid
                    return data
                    
                except json.JSONDecodeError as je:
                    # Handle JSON parsing errors: log a warning and retry if attempts remain
                    logging.warning(f"JSON parse error on attempt {attempt + 1}: {str(je)}")
                    if attempt == max_retries - 1:
                        raise   # Raise the exception if it is the final retry
                    sleep(1)    # Shorter sleep between retries for faster feedback
                    
                except Exception as e:
                    # Handle other exceptions: log and retry if attempts remain
                    if attempt == max_retries - 1:
                        raise     # Raise the exception if it is the final retry
                    sleep(1)      # Short sleep for retry logic
                    
        except Exception as e:
            # Log any error that occurs during the overall process and return an empty dictionary
            logging.error(f"Error analyzing resume: {str(e)}")
            return {}
            
            
    # Method to Process a Batch of Resumes Concurrently
    def process_resume_batch(self, pdf_paths: List[str]) -> List[Dict[str, Any]]:
        """Process a batch of resumes concurrently
        Args:
            pdf_paths (List[str]): A list of file paths to the PDF resumes to be processed.

        Returns:
            List[Dict[str, Any]]: A list of dictionaries containing the processed data from each resume.
    
        Logs:
            Logs errors encountered while processing individual resumes.
        """
        # List to store the results of successfully processed resumes
        results =[]

        # Use a ThreadPoolExecutor to process resumes concurrently with a maximum of 5 workers
        with ThreadPoolExecutor(max_workers=5) as executor:
            # Create a mapping of future objects to their respective PDF paths
            future_to_path = {
                executor.submit(self.process_single_resume, path): path 
                for path in pdf_paths
            }
            # Iterate over each future in the thread pool and track progress with tqdm
            for future in tqdm(future_to_path, desc="Processing resumes"):
                try:
                    # Retrieve the result from the future (blocks until completion)
                    result = future.result()
                    
                    # If the result is valid (non-empty), append it to the results list
                    if result:
                        results.append(result)
                        
                except Exception as e:
                    # Log any error encountered while processing a particular resume
                    logging.error(f"Error processing {future_to_path[future]}: {str(e)}")

        # Return the list of results after processing all resumes
        return results

    
    # Method to Process a Single Resume
    def process_single_resume(self, pdf_path: str) -> Dict[str, Any]:
        """Process a single resume
        Args:
            pdf_path (str): The file path to the PDF resume to be processed.

        Returns:
            Dict[str, Any]: A dictionary containing the analysis results or an empty dictionary if processing fails.

        Logs:Logs any errors encountered during the resume processing.
        """
        try:
            # Extract text from the given PDF file
            resume_text = self.extract_text_from_pdf(pdf_path)

            # If no text was extracted from the PDF, return an empty dictionary
            if not resume_text:
                return {}

            # Analyze the extracted resume text and obtain the results
            result = self.analyze_resume(resume_text)

            # If analysis returns valid data, add the source file name to the result
            if result:
                result['source_file'] = os.path.basename(pdf_path)
                
            # Return the analysis result
            return result
            
        except Exception as e:
            # Log any error encountered during the resume processing
            logging.error(f"Error processing {pdf_path}: {str(e)}")

            # Return an empty dictionary in case of an error
            return {}


    # Method to Create Detailed Sheets in an Excel File
    def create_detailed_sheets(self, writer: pd.ExcelWriter, results: List[Dict[str, Any]]) -> None:
        """Create additional sheets with detailed information\
        Args:
            writer (pd.ExcelWriter): The Excel writer object to write the data to the file.
            results (List[Dict[str, Any]]): A list of dictionaries containing processed resume data.
    
        Logs:
            The method processes and writes detailed data to separate sheets for projects and skills analysis.
        """
        # Projects sheet: Extract and collect project data from the results
        projects_data = []
        for result in results:
            # Check if 'supporting_info' and 'projects' are present in the result
            if 'supporting_info' in result and 'projects' in result['supporting_info']:
                for project in result['supporting_info']['projects']:
                    # Add the candidate's name to each project entry
                    project['candidate_name'] = result['name']
                    projects_data.append(project)

        # If there is project data, create a DataFrame and write it to the 'Projects' sheet
        if projects_data:
            pd.DataFrame(projects_data).to_excel(writer, sheet_name='Projects', index=False)

        # Skills Analysis sheet: Extract and collect skills-related data
        skills_data = []
        for result in results:
            # Check if 'key_skills' are present in the result
            if 'key_skills' in result:
                skills_data.append({
                    'name': result['name'],
                    'skills': ', '.join(result['key_skills']),
                    'ai_ml_score': result['ai_ml_score'],
                    'gen_ai_score': result['gen_ai_score']
                })

        # If there is skills data, create a DataFrame and write it to the 'Skills Analysis' sheet
        if skills_data:
            pd.DataFrame(skills_data).to_excel(writer, sheet_name='Skills Analysis', index=False)

    def process_resumes(self, pdf_folder: str, output_excel: str) -> None:
        """Process all resumes in the folder with batch processing"""
        # Step 1: Validate the PDF folder path
        if not os.path.exists(pdf_folder):
            raise ValueError(f"Folder not found: {pdf_folder}")
            
        ## Step 2: Get all PDF files from the specified folder
        pdf_files = [
            os.path.join(pdf_folder, f) 
            for f in os.listdir(pdf_folder) 
            if f.endswith('.pdf')
        ]
        
        # Step 3: Process the files in batches for optimal performance
        batch_size = 10
        all_results = []
        failed_files = []
        
        # Iterate through the PDF files in batches
        for i in range(0, len(pdf_files), batch_size):
            batch = pdf_files[i:i + batch_size]
            results = self.process_resume_batch(batch)
            
            # Step 4: Filter out and collect successful results
            all_results.extend([r for r in results if r])
            
            # Track failed files for reporting
            processed_files = {r.get('source_file') for r in results if r}
            failed_files.extend([
                (os.path.basename(f), "Processing failed")
                for f in batch
                if os.path.basename(f) not in processed_files
            ])

        # Step 5: Check if any files were successfully processed
        if not all_results:
            logging.warning("No resume data was successfully processed")
            return

        # Step 6: Create a DataFrame from the processed results
        try:
            # Create DataFrame with organized columns
            df = pd.json_normalize(
                all_results,
                sep='_',
                record_path=None,
                meta=[
                    'source_file', 'name', 'contact_details', 'university',
                    'year_of_study', 'course', 'discipline', 'cgpa_percentage',
                    'gen_ai_score', 'ai_ml_score'
                ]
            )
            
            ## Step 7: Format and save the results to an Excel file
            with pd.ExcelWriter(output_excel, engine='openpyxl') as writer:
                # Main analysis sheet
                df.to_excel(writer, sheet_name='Resume Analysis', index=False)
                
                # Additional sheets for detailed information if available
                if all_results:
                    self.create_detailed_sheets(writer, all_results)
                
                # # Step 8: Add a sheet for the failed files
                if failed_files:
                    pd.DataFrame(failed_files, columns=['Filename', 'Error']).to_excel(
                        writer, sheet_name='Failed Files', index=False
                    )

            # Log completion information
            logging.info(f"Processing complete. Output saved to {output_excel}")
            logging.info(f"Successfully processed: {len(all_results)} resumes")
            logging.info(f"Failed to process: {len(failed_files)} resumes")
            
        except Exception as e:
            # Handle any errors during Excel saving
            logging.error(f"Error saving to Excel: {str(e)}")

    def process_resumes_from_drive(self, folder_id: str, output_excel: str, local_temp_folder: str) -> None:
        """Download resumes from Google Drive, process them, and save results."""

        # Step 1: Ensure local temporary folder exists
        if not os.path.exists(local_temp_folder):
            os.makedirs(local_temp_folder)

        # Step 2: Download resumes from the specified Google Drive folder
        logging.info("Downloading resumes from Google Drive...")
        downloaded_files = self.download_resumes_from_drive(folder_id, local_temp_folder)
        
        # Step 3: Process the downloaded resumes locally
        logging.info("Processing downloaded resumes...")
        self.process_resumes(local_temp_folder, output_excel)

### Main Function

In [16]:
def main():
    """
    Main function to process resumes from Google Drive using the ResumeAnalyzer class.
    """
    # Step 1: Define API Key for Gemini (Google Generative AI)
    api_key = "AIzaSyAwcPMYXwd_ytgkZ0Od002VXcxNED7mss0"  # Replace with your Gemini API key

    # Step 2: Define Google Drive Service Account Credentials
    # These credentials are required to authenticate with the Google Drive API.
    credentials_dict = {
        "type": "service_account",
        "project_id": "resume-analyzer-448509",
        "private_key_id": "177d2fa6bd581d378501302bfea73d83120aa3f2",
        "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCmyXkcdxBmxm4s\nzqV/M+t/ie+Q8AyzISlLjItEMwWOL5fyoT2oztXh85hyXRQ5VgjTw8ZbEm9+kM4K\nJTOVYo8KwX0m1+/9hcc2NBdGtNWgmwVjelBlMNLAKtpfV3W+wtOXPVdwIIzVh4ZW\nC/tegE09WZamV2npzIOlamdmDrrVOTa8K2tmd60hLx4Zm6wh943GDntjEEyKPnXt\nTD99231Y9e9/0ENUMZImALDHEk+PZUasBB2zCX4/hM95dnwjcftjsnKmlqdqbJ4T\n+urPX0NcD796BnF+/CzCEvxJE9QxM5B5xo+MijnSHmSZh5DADNVFjG61i5tmEu55\nkBEzcUBTAgMBAAECggEANyRqRcZ5pjkZqPfDUKx2Qkr06RrsX/bpQ8CaNLiXsBw9\nJOs5SwvjO9qBPuJWMuSmc3kJJggQ06wFauZEyTF2MGyrN4HyJwQpSlrrjSVxcbF/\nFRYV6Xa8XutsGlR8qc6ZDSorM/eXCGvPtrgsaeCKaOurOqCw9Cr5JWHqAJyDGcZQ\n1qbDejjJYdT9Clnhgo3VO0wLGL16kIXoeVhFEFSz1bfVeAN13SsXoZwNtWIALHed\nYbRs/m/h7rH1SjFXReqstDuNycdg3P1qA3zzEcvXnemDIS9XoLLVTQ8+EHEXX9OK\nW4Mic9oik76vEia0gqy/r8koMKX0foUpBtYxg86soQKBgQDlgAyhPDlKytSiogGn\nr3EoTI2gal8sEJT3vJ6YqH191dZlbIJDqw/Izm5dUblK+OfJgRT+/aqOCfBuWixo\nfolXZIEVf6haamoG5OxBMm6BPtLVZKxTqiUGm06fbb/VgDn+Ne93qhAfh3Saud/d\nA9cgKznrSh5oRDp5rBcB7L5mMwKBgQC6C6R01neAjvpgEHnXTycnIbEIMJNch8+P\niA8FBVmyDG2s2Tquxi3zy/DIWaOlcU4hbUn69sqlDzxMnBbD0EUpcpSOTd1wb0AU\n0awpKgSGkeKeSErcGTtK2ZFZrSXJ80+R7v3LJBWQe9V5+KJuQ8t8qBXqNLc/hTyf\naET+NyRdYQKBgFsWHm0j9O09YXWG7Tc7h6Lh91gSv+FCV42X3C5kuSHnbCdnELCA\nxUwCuoTpOayK65vrUoT98uJbCYUS3ws+JY59AkqhFxWDpL9FvosF1BwR7iXpxgeS\nwQ1FOMhIC9pWAS5nA7sv0SRiY6JBYZtbudc3sM405aqmYbsG1T7bUppHAoGAfLpL\nci1KhrzUbMz+8nNVe22iAyyVzuYaKE7+Ss5weObOLKCiMAQbUKQ8dAVsqgERcWWU\nwJTt/MT/FxlaRcL+azAvGkxnlfZvzsVXF23dBN8PQDCVR4P+9UpxoN5tRDxD70F7\nktJJRslOsGwZcbUv2g3SS2c2J3bK0tAT2R7c2WECgYEAuNBKriXImge1Hn+PowWQ\nR998RVjwnmC4LTMXNsA2idS32rHKwks1xffAcex/MuQkZEqtfls4KWtm/nzstgAG\ndY3scxo0ozYrj/oEvBE5DE6XOo8o29uN0KIyGMqRjZxo+2TASKi/aAgrfztlZW2v\nDcYyCh3/+vGea7+GXAkuRcA=\n-----END PRIVATE KEY-----\n",
        "client_email": "resume-analyzer-service-449@resume-analyzer-448509.iam.gserviceaccount.com",
        "client_id": "101646634736421126921",
        "auth_uri": "https://accounts.google.com/o/oauth2/auth",
        "token_uri": "https://oauth2.googleapis.com/token",
        "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
        "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/resume-analyzer-service-449%40resume-analyzer-448509.iam.gserviceaccount.com",
        "universe_domain": "googleapis.com"
    }

    # Step 3: Define Google Drive Folder ID and Local Temporary Folder
    drive_folder_id = "1k8DS8dmm3fmUuwRIxv-g16SSI4GtB69P"  # Google drive folder ID
    local_temp_folder = "local_resumes_folder"  # Temporary folder for downloaded files 

    # Step 4: Specify the Output Excel File Name
    output_excel = "Resume_Analysis_Output.xlsx"

    # Step 5: Initialize ResumeAnalyzer
    # This sets up the Generative AI and Google Drive integrations
    analyzer = ResumeAnalyzer(api_key, credentials_dict=credentials_dict)
    
    # Step 6: Process Resumes from Google Drive
    # Downloads resumes from the Drive folder, processes them, and saves the analysis to an Excel file
    analyzer.process_resumes_from_drive(drive_folder_id, output_excel, local_temp_folder)

### Execute the Main Function

In [18]:
if __name__ == "__main__":
    main()

2025-01-21 20:59:00,606 - INFO - file_cache is only supported with oauth2client<4.0.0
2025-01-21 20:59:00,669 - INFO - Downloading resumes from Google Drive...
2025-01-21 20:59:02,305 - INFO - Found 15 files in the folder.
2025-01-21 20:59:04,002 - INFO - Download progress: 100%
2025-01-21 20:59:06,221 - INFO - Download progress: 100%
2025-01-21 20:59:07,780 - INFO - Download progress: 100%
2025-01-21 20:59:09,357 - INFO - Download progress: 100%
2025-01-21 20:59:11,135 - INFO - Download progress: 100%
2025-01-21 20:59:12,564 - INFO - Download progress: 100%
2025-01-21 20:59:14,670 - INFO - Download progress: 100%
2025-01-21 20:59:16,006 - INFO - Download progress: 100%
2025-01-21 20:59:17,596 - INFO - Download progress: 100%
2025-01-21 20:59:19,216 - INFO - Download progress: 100%
2025-01-21 20:59:20,409 - INFO - Download progress: 100%
2025-01-21 20:59:21,857 - INFO - Download progress: 100%
2025-01-21 20:59:23,430 - INFO - Download progress: 100%
2025-01-21 20:59:35,846 - INFO - Dow