In [1]:
import json
from datetime import datetime
from dateutil.relativedelta import relativedelta
import re

def parse_date(date_str):
    """
    Parse date string in various formats to datetime object.
    Handles formats like 'November 2023', 'Oct 2021', 'Present', etc.
    """
    if not date_str or date_str.lower() == 'present':
        return datetime.now()
    
    # Clean the date string
    date_str = date_str.strip()
    
    # Handle month-year format (e.g., "November 2023", "Oct 2021")
    try:
        # Try full month name first
        return datetime.strptime(date_str, "%B %Y")
    except ValueError:
        try:
            # Try abbreviated month name
            return datetime.strptime(date_str, "%b %Y")
        except ValueError:
            try:
                # Try year only
                return datetime.strptime(date_str, "%Y")
            except ValueError:
                # If all else fails, return current date
                print(f"Warning: Could not parse date '{date_str}', using current date")
                return datetime.now()

def calculate_experience_duration(start_date, end_date):
    """
    Calculate the duration between two dates in years and months.
    """
    start = parse_date(start_date)
    end = parse_date(end_date)
    
    # Calculate the difference
    diff = relativedelta(end, start)
    
    return {
        'years': diff.years,
        'months': diff.months,
        'total_months': diff.years * 12 + diff.months,
        'start_date': start.strftime("%B %Y"),
        'end_date': end.strftime("%B %Y") if end_date and end_date.lower() != 'present' else 'Present'
    }

def calculate_total_professional_experience(cv_data):
    """
    Calculate total professional experience from CV work experience data.
    """
    work_experiences = cv_data['CV_data']['structured_data']['work_experience']
    
    total_months = 0
    experience_details = []
    
    print("Professional Experience Breakdown:")
    print("=" * 50)
    
    for i, job in enumerate(work_experiences, 1):
        company = job['company']
        title = job['title']
        start_date = job['start_date']
        end_date = job['end_date']
        
        duration = calculate_experience_duration(start_date, end_date)
        total_months += duration['total_months']
        
        experience_details.append({
            'company': company,
            'title': title,
            'duration': duration
        })
        
        print(f"{i}. {company}")
        print(f"   Position: {title}")
        print(f"   Duration: {duration['start_date']} - {duration['end_date']}")
        print(f"   Length: {duration['years']} years, {duration['months']} months")
        print()
    
    # Convert total months to years and months
    total_years = total_months // 12
    remaining_months = total_months % 12
    
    print("Total Professional Experience Summary:")
    print("=" * 50)
    print(f"Total Experience: {total_years} years, {remaining_months} months")
    print(f"Total Months: {total_months} months")
    print(f"Total Years (decimal): {total_months / 12:.2f} years")
    
    return {
        'total_years': total_years,
        'total_months_remainder': remaining_months,
        'total_months': total_months,
        'total_years_decimal': round(total_months / 12, 2),
        'experience_breakdown': experience_details
    }

def load_cv_data(file_path=None, cv_data_dict=None):
    """
    Load CV data from file or use provided dictionary.
    """
    if cv_data_dict:
        return cv_data_dict
    
    if file_path:
        with open(file_path, 'r') as file:
            return json.load(file)
    
    # Sample data from the provided CV
    return {
        "CV_data": {
            "structured_data": {
                "work_experience": [
                    {
                        "company": "Really Great Tech",
                        "title": "Data Analytics/AI/ML Engineer",
                        "start_date": "November 2023",
                        "end_date": "October 2024"
                    },
                    {
                        "company": "Freelancer (ALX venturers)",
                        "title": "Data Scientist",
                        "start_date": "September 2024",
                        "end_date": "Present"
                    },
                    {
                        "company": "Ebit",
                        "title": "Data Scientist",
                        "start_date": "September 2021",
                        "end_date": "September 2023"
                    }
                ]
            }
        }
    }

# Main execution
if __name__ == "__main__":
    try:
        # Note: You need to install python-dateutil if not already installed
        # pip install python-dateutil
        
        # Load the CV data (using the sample data from the document)
        cv_data = load_cv_data()
        
        # Calculate professional experience
        experience_summary = calculate_total_professional_experience(cv_data)
        
        print("\n" + "=" * 50)
        print("FINAL SUMMARY")
        print("=" * 50)
        print(f"Name: Aidoo Enoch Kwadwo")
        print(f"Total Professional Experience: {experience_summary['total_years']} years, {experience_summary['total_months_remainder']} months")
        print(f"Equivalent to: {experience_summary['total_years_decimal']} years")
        
    except ImportError:
        print("Error: python-dateutil package is required.")
        print("Please install it using: pip install python-dateutil")
    except Exception as e:
        print(f"An error occurred: {e}")

Professional Experience Breakdown:
1. Really Great Tech
   Position: Data Analytics/AI/ML Engineer
   Duration: November 2023 - October 2024
   Length: 0 years, 11 months

2. Freelancer (ALX venturers)
   Position: Data Scientist
   Duration: September 2024 - Present
   Length: 1 years, 0 months

3. Ebit
   Position: Data Scientist
   Duration: September 2021 - September 2023
   Length: 2 years, 0 months

Total Professional Experience Summary:
Total Experience: 3 years, 11 months
Total Months: 47 months
Total Years (decimal): 3.92 years

FINAL SUMMARY
Name: Aidoo Enoch Kwadwo
Total Professional Experience: 3 years, 11 months
Equivalent to: 3.92 years


In [24]:
import json
from datetime import datetime
from dateutil.relativedelta import relativedelta
import re

class ProfessionalExperienceCalculator:
    """
    A class to calculate professional experience from CV work experience data.
    """
    
    def __init__(self, cv_data=None, file_path=None):
        """
        Initialize the calculator with CV data.
        
        Args:
            cv_data (dict): Dictionary containing CV data
            file_path (str): Path to JSON file containing CV data
        """
        self.cv_data = self._load_cv_data(cv_data, file_path)
        self.experience_summary = None
    
    def _load_cv_data(self, cv_data_dict=None, file_path=None):
        """
        Load CV data from file or use provided dictionary.
        
        Args:
            cv_data_dict (dict): CV data dictionary
            file_path (str): Path to JSON file
            
        Returns:
            dict: CV data
        """
        if cv_data_dict:
            return cv_data_dict
        
        if file_path:
            with open(file_path, 'r') as file:
                return json.load(file)
        
        # Return empty structure if no data provided
        return {
            "CV_data": {
                "structured_data": {
                    "work_experience": []
                }
            }
        }
    
    def _parse_date(self, date_str):
        """
        Parse date string in various formats to datetime object.
        
        Args:
            date_str (str): Date string to parse
            
        Returns:
            datetime: Parsed datetime object
        """
        if not date_str or date_str.lower() == 'present':
            return datetime.now()
        
        # Clean the date string
        date_str = date_str.strip()
        
        # Handle month-year format (e.g., "November 2023", "Oct 2021")
        try:
            # Try full month name first
            return datetime.strptime(date_str, "%B %Y")
        except ValueError:
            try:
                # Try abbreviated month name
                return datetime.strptime(date_str, "%b %Y")
            except ValueError:
                try:
                    # Try year only
                    return datetime.strptime(date_str, "%Y")
                except ValueError:
                    # If all else fails, return current date
                    print(f"Warning: Could not parse date '{date_str}', using current date")
                    return datetime.now()
    
    def _calculate_duration(self, start_date, end_date):
        """
        Calculate the duration between two dates in years and months.
        
        Args:
            start_date (str): Start date string
            end_date (str): End date string
            
        Returns:
            dict: Duration information
        """
        start = self._parse_date(start_date)
        end = self._parse_date(end_date)
        
        # Calculate the difference
        diff = relativedelta(end, start)
        
        return {
            'years': diff.years,
            'months': diff.months,
            'total_months': diff.years * 12 + diff.months,
            'start_date': start.strftime("%B %Y"),
            'end_date': end.strftime("%B %Y") if end_date and end_date.lower() != 'present' else 'Present'
        }
    
    def calculate_experience(self, verbose=True):
        """
        Calculate total professional experience from CV work experience data.
        
        Args:
            verbose (bool): Whether to print detailed breakdown
            
        Returns:
            dict: Experience summary
        """
        work_experiences = self.cv_data['CV_data']['structured_data']['work_experience']
        
        total_months = 0
        experience_details = []
        
        if verbose:
            print("Professional Experience Breakdown:")
            print("=" * 50)
        
        for i, job in enumerate(work_experiences, 1):
            company = job.get('company', 'Unknown Company')
            title = job.get('title', 'Unknown Position')
            start_date = job.get('start_date')
            end_date = job.get('end_date')
            
            duration = self._calculate_duration(start_date, end_date)
            total_months += duration['total_months']
            
            experience_details.append({
                'company': company,
                'title': title,
                'duration': duration
            })
            
            if verbose:
                print(f"{i}. {company}")
                print(f"   Position: {title}")
                print(f"   Duration: {duration['start_date']} - {duration['end_date']}")
                print(f"   Length: {duration['years']} years, {duration['months']} months")
                print()
        
        # Convert total months to years and months
        total_years = total_months // 12
        remaining_months = total_months % 12
        
        self.experience_summary = {
            'total_years': total_years,
            'total_months_remainder': remaining_months,
            'total_months': total_months,
            'total_years_decimal': round(total_months / 12, 2),
            'experience_breakdown': experience_details,
            'number_of_positions': len(work_experiences)
        }
        
        if verbose:
            self.print_summary()
        
        return self.experience_summary
    
    def print_summary(self):
        """Print the experience summary."""
        if not self.experience_summary:
            print("No experience calculated yet. Please run calculate_experience() first.")
            return
        
        print("Total Professional Experience Summary:")
        print("=" * 50)
        print(f"Number of Positions: {self.experience_summary['number_of_positions']}")
        print(f"Total Experience: {self.experience_summary['total_years']} years, {self.experience_summary['total_months_remainder']} months")
        print(f"Total Months: {self.experience_summary['total_months']} months")
        print(f"Total Years (decimal): {self.experience_summary['total_years_decimal']} years")
    
    def get_total_months(self):
        """
        Get total experience in months.
        
        Returns:
            int: Total months of experience
        """
        if not self.experience_summary:
            self.calculate_experience(verbose=False)
        return self.experience_summary['total_months']
    
    def get_total_years(self):
        """
        Get total experience in years (decimal).
        
        Returns:
            float: Total years of experience
        """
        if not self.experience_summary:
            self.calculate_experience(verbose=False)
        return self.experience_summary['total_years_decimal']
    
    def get_experience_breakdown(self):
        """
        Get detailed breakdown of each position.
        
        Returns:
            list: List of experience details for each position
        """
        if not self.experience_summary:
            self.calculate_experience(verbose=False)
        return self.experience_summary['experience_breakdown']
    
    def update_cv_data(self, new_cv_data=None, file_path=None):
        """
        Update the CV data and reset experience summary.
        
        Args:
            new_cv_data (dict): New CV data dictionary
            file_path (str): Path to new JSON file
        """
        self.cv_data = self._load_cv_data(new_cv_data, file_path)
        self.experience_summary = None
    
    def get_name(self):
        """
        Get the person's name from CV data.
        
        Returns:
            str: Person's name or 'Unknown' if not found
        """
        return self.cv_data.get('CV_data', {}).get('structured_data', {}).get('name', 'Unknown')


# Example usage and demonstration
if __name__ == "__main__":
    try:
        # Sample CV data from the document
        sample_cv_data = {
            "CV_data": {
                "structured_data": {
                    "name": "Aidoo Enoch Kwadwo",
                    "work_experience": [
                        {
                            "company": "Really Great Tech",
                            "title": "Data Analytics/AI/ML Engineer",
                            "start_date": "November 2023",
                            "end_date": "October 2024"
                        },
                        {
                            "company": "Freelancer (ALX venturers)",
                            "title": "Data Scientist",
                            "start_date": "September 2024",
                            "end_date": "Present"
                        },
                    {
                        "company": "Ebit",
                        "title": "Data Scientist",
                        "start_date": "September 2021",
                        "end_date": "September 2023"
                    }
                    ]
                }
            }
        }
        
        # Create calculator instance
        calculator = ProfessionalExperienceCalculator(cv_data=sample_cv_data)
        
        # Calculate experience with detailed output
        experience_summary = calculator.calculate_experience(verbose=True)
        
        print("\n" + "=" * 50)
        print("USING CLASS METHODS")
        print("=" * 50)
        print(f"Name: {calculator.get_name()}")
        print(f"Total Experience (decimal years): {calculator.get_total_years()}")
        print(f"Total Experience (months): {calculator.get_total_months()}")
        
        # Get individual position details
        print(f"\nNumber of positions: {len(calculator.get_experience_breakdown())}")
        
        # Example of using class methods without verbose output
        print("\n" + "=" * 50)
        print("SILENT CALCULATION EXAMPLE")
        print("=" * 50)
        
        # Create new instance and calculate silently
        silent_calculator = ProfessionalExperienceCalculator(cv_data=sample_cv_data)
        total_years = silent_calculator.get_total_years()  # This will calculate automatically
        print(f"Total experience: {total_years} years (calculated silently)")
        
    except ImportError:
        print("Error: python-dateutil package is required.")
        print("Please install it using: pip install python-dateutil")
    except Exception as e:
        print(f"An error occurred: {e}")

Professional Experience Breakdown:
1. Really Great Tech
   Position: Data Analytics/AI/ML Engineer
   Duration: November 2023 - October 2024
   Length: 0 years, 11 months

2. Freelancer (ALX venturers)
   Position: Data Scientist
   Duration: September 2024 - Present
   Length: 1 years, 0 months

3. Ebit
   Position: Data Scientist
   Duration: September 2021 - September 2023
   Length: 2 years, 0 months

Total Professional Experience Summary:
Number of Positions: 3
Total Experience: 3 years, 11 months
Total Months: 47 months
Total Years (decimal): 3.92 years

USING CLASS METHODS
Name: Aidoo Enoch Kwadwo
Total Experience (decimal years): 3.92
Total Experience (months): 47

Number of positions: 3

SILENT CALCULATION EXAMPLE
Total experience: 3.92 years (calculated silently)


In [25]:
import json
from datetime import datetime
from dateutil.relativedelta import relativedelta
import re

def parse_date(date_str):
    """
    Parse date string in various formats to datetime object.
    Handles formats like 'November 2023', 'Oct 2021', 'Present', etc.
    """
    if not date_str or date_str.lower() == 'present':
        return datetime.now()
    
    date_str = date_str.strip()
    try:
        return datetime.strptime(date_str, "%B %Y")
    except ValueError:
        try:
            return datetime.strptime(date_str, "%b %Y")
        except ValueError:
            try:
                return datetime.strptime(date_str, "%Y")
            except ValueError:
                print(f"Warning: Could not parse date '{date_str}', using current date")
                return datetime.now()

def merge_intervals(intervals):
    """
    Merge overlapping intervals.
    intervals: list of (start_datetime, end_datetime)
    """
    if not intervals:
        return []

    intervals.sort(key=lambda x: x[0])
    merged = [intervals[0]]

    for current_start, current_end in intervals[1:]:
        last_start, last_end = merged[-1]
        if current_start <= last_end:  # overlap
            merged[-1] = (last_start, max(last_end, current_end))
        else:
            merged.append((current_start, current_end))
    
    return merged

def calculate_total_professional_experience(cv_data):
    """
    Calculate total professional experience (without double-counting overlaps).
    """
    work_experiences = cv_data['CV_data']['structured_data']['work_experience']
    intervals = []

    print("Professional Experience Breakdown:")
    print("=" * 50)

    for i, job in enumerate(work_experiences, 1):
        company = job['company']
        title = job['title']
        start_date = parse_date(job['start_date'])
        end_date = parse_date(job['end_date'])

        intervals.append((start_date, end_date))

        diff = relativedelta(end_date, start_date)
        print(f"{i}. {company}")
        print(f"   Position: {title}")
        print(f"   Duration: {start_date.strftime('%B %Y')} - "
              f"{'Present' if job['end_date'].lower() == 'present' else end_date.strftime('%B %Y')}")
        print(f"   Length: {diff.years} years, {diff.months} months")
        print()

    # Merge overlapping intervals
    merged_intervals = merge_intervals(intervals)

    # Calculate total months from merged intervals
    total_months = 0
    for start, end in merged_intervals:
        diff = relativedelta(end, start)
        total_months += diff.years * 12 + diff.months

    total_years = total_months // 12
    remaining_months = total_months % 12

    print("Total Professional Experience Summary (No Overlaps):")
    print("=" * 50)
    print(f"Total Experience: {total_years} years, {remaining_months} months")
    print(f"Total Months: {total_months} months")
    print(f"Total Years (decimal): {total_months / 12:.2f} years")

    return {
        'total_years': total_years,
        'total_months_remainder': remaining_months,
        'total_months': total_months,
        'total_years_decimal': round(total_months / 12, 2),
    }

def load_cv_data(file_path=None, cv_data_dict=None):
    """
    Load CV data from file or use provided dictionary.
    """
    if cv_data_dict:
        return cv_data_dict
    
    if file_path:
        with open(file_path, 'r') as file:
            return json.load(file)
    
    # Sample CV data
    return {
        "CV_data": {
            "structured_data": {
                "work_experience": [
                    {
                        "company": "Really Great Tech",
                        "title": "Data Analytics/AI/ML Engineer",
                        "start_date": "November 2023",
                        "end_date": "October 2024"
                    },
                    {
                        "company": "Freelancer (ALX venturers)",
                        "title": "Data Scientist",
                        "start_date": "September 2024",
                        "end_date": "Present"
                    },
                    {
                        "company": "Ebit",
                        "title": "Data Scientist",
                        "start_date": "September 2021",
                        "end_date": "September 2023"
                    }
                ]
            }
        }
    }

# Main execution
if __name__ == "__main__":
    try:
        cv_data = load_cv_data()
        experience_summary = calculate_total_professional_experience(cv_data)
        
        print("\n" + "=" * 50)
        print("FINAL SUMMARY")
        print("=" * 50)
        print(f"Name: Aidoo Enoch Kwadwo")
        print(f"Total Professional Experience: {experience_summary['total_years']} years, {experience_summary['total_months_remainder']} months")
        print(f"Equivalent to: {experience_summary['total_years_decimal']} years")
        
    except ImportError:
        print("Error: python-dateutil package is required.")
        print("Please install it using: pip install python-dateutil")
    except Exception as e:
        print(f"An error occurred: {e}")


Professional Experience Breakdown:
1. Really Great Tech
   Position: Data Analytics/AI/ML Engineer
   Duration: November 2023 - October 2024
   Length: 0 years, 11 months

2. Freelancer (ALX venturers)
   Position: Data Scientist
   Duration: September 2024 - Present
   Length: 1 years, 0 months

3. Ebit
   Position: Data Scientist
   Duration: September 2021 - September 2023
   Length: 2 years, 0 months

Total Professional Experience Summary (No Overlaps):
Total Experience: 3 years, 10 months
Total Months: 46 months
Total Years (decimal): 3.83 years

FINAL SUMMARY
Name: Aidoo Enoch Kwadwo
Total Professional Experience: 3 years, 10 months
Equivalent to: 3.83 years


In [21]:
import json
from datetime import datetime
from dateutil.relativedelta import relativedelta
import re

class ProfessionalExperienceCalculator:
    """
    A class to calculate professional experience from CV work experience data,
    handling overlaps and providing structured outputs.
    """
    
    def __init__(self, file_path=None, cv_data_dict=None):
        """
        Initialize the calculator with CV data.
        
        Args:
            file_path (str, optional): Path to JSON file containing CV data
            cv_data_dict (dict, optional): Dictionary containing CV data
        """
        self.cv_data = self._load_cv_data(file_path, cv_data_dict)
        self.experience_summary = None
    
    def _load_cv_data(self, file_path=None, cv_data_dict=None):
        """
        Load CV data from file or use provided dictionary.
        
        Args:
            file_path (str, optional): Path to JSON file
            cv_data_dict (dict, optional): CV data dictionary
            
        Returns:
            dict: CV data
        """
        if cv_data_dict:
            return cv_data_dict
        
        if file_path:
            try:
                with open(file_path, 'r') as file:
                    return json.load(file)
            except FileNotFoundError:
                print(f"Error: File '{file_path}' not found. Using sample data.")
            except json.JSONDecodeError:
                print(f"Error: Invalid JSON in '{file_path}'. Using sample data.")
        
        # Sample CV data
        return {
            "CV_data": {
                "structured_data": {
                    "work_experience": [
                        {
                            "company": "Really Great Tech",
                            "title": "Data Analytics/AI/ML Engineer",
                            "start_date": "November 2023",
                            "end_date": "October 2024"
                        },
                        {
                            "company": "Freelancer (ALX venturers)",
                            "title": "Data Scientist",
                            "start_date": "September 2024",
                            "end_date": "Present"
                        },
                        {
                            "company": "Ebit",
                            "title": "Data Scientist",
                            "start_date": "September 2021",
                            "end_date": "September 2023"
                        }
                    ]
                }
            }
        }
    
    def _parse_date(self, date_str):
        """
        Parse date string in various formats to datetime object.
        Handles formats like 'November 2023', 'Oct 2021', 'Present', etc.
        Falls back to current date if parsing fails or input is invalid.
        
        Args:
            date_str (str): Date string to parse
            
        Returns:
            datetime: Parsed datetime object
        """
        if not date_str or date_str.lower() == 'present':
            return datetime.now()
        
        date_str = date_str.strip()
        try:
            return datetime.strptime(date_str, "%B %Y")
        except ValueError:
            try:
                return datetime.strptime(date_str, "%b %Y")
            except ValueError:
                try:
                    return datetime.strptime(date_str, "%Y")
                except ValueError:
                    print(f"Warning: Could not parse date '{date_str}', using current date")
                    return datetime.now()
    
    def _merge_intervals(self, intervals):
        """
        Merge overlapping intervals.
        
        Args:
            intervals: list of (start_datetime, end_datetime)
            
        Returns:
            list: Merged intervals
        """
        if not intervals:
            return []

        intervals.sort(key=lambda x: x[0])
        merged = [intervals[0]]

        for current_start, current_end in intervals[1:]:
            last_start, last_end = merged[-1]
            if current_start <= last_end:  # overlap
                merged[-1] = (last_start, max(last_end, current_end))
            else:
                merged.append((current_start, current_end))
        
        return merged
    
    def calculate_experience(self, verbose=True):
        """
        Calculate total professional experience (without double-counting overlaps).
        
        Args:
            verbose (bool): Whether to print detailed breakdown (default: True)
            
        Returns:
            dict: Experience summary
        """
        work_experiences = []
        
        # The data from the file is the root, so we access "CV_data" from it.
        cv_data_node = self.cv_data.get('CV_data', {})

        # Check for 'structured_data' layer, or access directly
        if 'structured_data' in cv_data_node:
            work_experiences = cv_data_node['structured_data'].get('work_experience', [])
        else:
            work_experiences = cv_data_node.get('work_experience', [])

        if not work_experiences:
             print("Warning: 'work_experience' list is empty or not found. Returning zero experience.")
             self.experience_summary = {
                'total_years': 0, 'total_months_remainder': 0, 'total_months': 0,
                'total_years_decimal': 0.0, 'experience_breakdown': []
             }
             return self.experience_summary

        intervals = []
        experience_breakdown = []

        if verbose:
            print("Professional Experience Breakdown:")
            print("=" * 50)

        for i, job in enumerate(work_experiences, 1):
            company = job.get('company', 'Unknown Company')
            title = job.get('title', 'Unknown Position')
            start_date_str = job.get('start_date')
            end_date_str = job.get('end_date')

            if not start_date_str:
                print(f"Warning: Skipping job {i} due to missing start_date")
                continue

            start_date = self._parse_date(start_date_str)
            end_date = self._parse_date(end_date_str)

            intervals.append((start_date, end_date))

            diff = relativedelta(end_date, start_date)
            job_details = {
                'company': company,
                'title': title,
                'start_date': start_date.strftime('%B %Y'),
                'end_date': 'Present' if end_date_str and end_date_str.lower() == 'present' else end_date.strftime('%B %Y'),
                'years': diff.years,
                'months': diff.months,
                'total_months': diff.years * 12 + diff.months
            }
            experience_breakdown.append(job_details)

            if verbose:
                print(f"{i}. {company}")
                print(f"   Position: {title}")
                print(f"   Duration: {job_details['start_date']} - {job_details['end_date']}")
                print(f"   Length: {diff.years} years, {diff.months} months")
                print()

        # Merge overlapping intervals
        merged_intervals = self._merge_intervals(intervals)

        # Calculate total months from merged intervals
        total_months = 0
        for start, end in merged_intervals:
            diff = relativedelta(end, start)
            total_months += diff.years * 12 + diff.months

        total_years = total_months // 12
        remaining_months = total_months % 12

        self.experience_summary = {
            'total_years': total_years,
            'total_months_remainder': remaining_months,
            'total_months': total_months,
            'total_years_decimal': round(total_months / 12, 2),
            'experience_breakdown': experience_breakdown
        }

        if verbose:
            self.print_summary()

        return self.experience_summary
    
    def print_summary(self):
        """Print the experience summary."""
        if not self.experience_summary:
            print("No experience calculated yet. Please run calculate_experience() first.")
            return
        
        print("Total Professional Experience Summary (No Overlaps):")
        print("=" * 50)
        print(f"Total Experience: {self.experience_summary['total_years']} years, {self.experience_summary['total_months_remainder']} months")
        print(f"Total Months: {self.experience_summary['total_months']} months")
        print(f"Total Years (decimal): {self.experience_summary['total_years_decimal']} years")
    
    def get_total_months(self):
        """
        Get total experience in months.
        
        Returns:
            int: Total months of experience
        """
        if not self.experience_summary:
            self.calculate_experience(verbose=False)
        return self.experience_summary['total_months']
    
    def get_total_years(self):
        """
        Get total experience in years (decimal).
        
        Returns:
            float: Total years of experience
        """
        if not self.experience_summary:
            self.calculate_experience(verbose=False)
        return self.experience_summary['total_years_decimal']
    
    def get_experience_breakdown(self):
        """
        Get detailed breakdown of each position.
        
        Returns:
            list: List of experience details for each position
        """
        if not self.experience_summary:
            self.calculate_experience(verbose=False)
        return self.experience_summary['experience_breakdown']
    
    def update_cv_data(self, file_path=None, cv_data_dict=None):
        """
        Update the CV data and reset experience summary.
        
        Args:
            file_path (str, optional): Path to new JSON file
            cv_data_dict (dict, optional): New CV data dictionary
        """
        self.cv_data = self._load_cv_data(file_path, cv_data_dict)
        self.experience_summary = None
    
    def get_name(self):
        """
        Get the person's name from CV data.
        
        Returns:
            str: Person's name or 'Unknown' if not found
        """
        cv_data_node = self.cv_data.get('CV_data', {})
        if 'structured_data' in cv_data_node:
            return cv_data_node['structured_data'].get('name', 'Unknown')
        return cv_data_node.get('name', 'Unknown')

# Main execution and demonstration
if __name__ == "__main__":
    try:
        # Initialize with sample data
        calculator = ProfessionalExperienceCalculator()
        
        # Calculate experience with detailed output
        experience_summary = calculator.calculate_experience(verbose=True)
        
        print("\n" + "=" * 50)
        print("FINAL SUMMARY")
        print("=" * 50)
        print(f"Name: {calculator.get_name()}")
        print(f"Total Professional Experience: {experience_summary['total_years']} years, {experience_summary['total_months_remainder']} months")
        print(f"Equivalent to: {experience_summary['total_years_decimal']} years")
        
        # Demonstrate silent calculation
        print("\n" + "=" * 50)
        print("SILENT CALCULATION EXAMPLE")
        print("=" * 50)
        print(f"Total experience (decimal years): {calculator.get_total_years()}")
        print(f"Total experience (months): {calculator.get_total_months()}")
        print(f"Number of positions: {len(calculator.get_experience_breakdown())}")
        
    except ImportError:
        print("Error: python-dateutil package is required.")
        print("Please install it using: pip install python-dateutil")
    except Exception as e:
        print(f"An error occurred: {e}")


Professional Experience Breakdown:
1. Really Great Tech
   Position: Data Analytics/AI/ML Engineer
   Duration: November 2023 - October 2024
   Length: 0 years, 11 months

2. Freelancer (ALX venturers)
   Position: Data Scientist
   Duration: September 2024 - Present
   Length: 1 years, 1 months

3. Ebit
   Position: Data Scientist
   Duration: September 2021 - September 2023
   Length: 2 years, 0 months

Total Professional Experience Summary (No Overlaps):
Total Experience: 3 years, 11 months
Total Months: 47 months
Total Years (decimal): 3.92 years

FINAL SUMMARY
Name: Unknown
Total Professional Experience: 3 years, 11 months
Equivalent to: 3.92 years

SILENT CALCULATION EXAMPLE
Total experience (decimal years): 3.92
Total experience (months): 47
Number of positions: 3


In [22]:
# from professional_experience_calculator import ProfessionalExperienceCalculator  # Assume the class is saved in a file named professional_experience_calculator.py

try:
    # Initialize the calculator with your JSON file
    calculator = ProfessionalExperienceCalculator(file_path="./extracted_files/CV_Image.json")
    
    # Calculate experience (set verbose=False to suppress detailed output)
    experience_summary = calculator.calculate_experience(verbose=True)
    
    # Extract total professional experience
    total_months = calculator.get_total_months()
    total_years = calculator.get_total_years()
    
    print("\nExtracted Professional Experience:")
    print(f"Total Months: {total_months} months")
    print(f"Total Years (decimal): {total_years} years")
    
except Exception as e:
    print(f"An error occurred: {e}")


Extracted Professional Experience:
Total Months: 0 months
Total Years (decimal): 0.0 years


In [11]:
from prof_years_extractor import ProfessionalExperienceCalculator

try:
    # Initialize with a JSON file
    calculator = ProfessionalExperienceCalculator(file_path="./extracted_files/CV_Image.json")
    
    # Get total years of experience
    total_years = calculator.get_total_years()
    print(f"Total Professional Experience: {total_years} years")
    

    # calculator.update_cv_data(cv_data_dict=cv_data)
    # print(f"Updated Total Years: {calculator.get_total_years()} years")

except Exception as e:
    print(f"Error: {e}")

Total Professional Experience: 0.0 years


**Testing the Extraction and the Professional Experience Calculator**

In [1]:
import os
import json
from docstrange_extractor import CVExtractor
from prof_years_extractor import ProfessionalExperienceCalculator

def clean_cv_data(cv_data):
    """
    Recursively clean extracted CV data:
    - Replace None/null with [] for list-like fields
    - Replace None/null with "" for text fields
    """
    if isinstance(cv_data, dict):
        cleaned = {}
        for key, value in cv_data.items():
            if value is None:
                if key in ["work_experience", "education", "skills", "soft_skills",
                           "certifications", "projects", "languages", "hobbies"]:
                    cleaned[key] = []
                else:
                    cleaned[key] = ""
            else:
                cleaned[key] = clean_cv_data(value)
        return cleaned

    elif isinstance(cv_data, list):
        return [clean_cv_data(item) for item in cv_data]

    else:
        return cv_data

def extract_and_save_cv(cv_file_path, output_dir):
    """
    Extract CV data, calculate years of experience, and save to JSON.
    
    Args:
        cv_file_path (str): Path to the input CV file (e.g., PDF, DOCX)
        output_dir (str): Directory to save the output JSON file
    
    Returns:
        str: Path to the saved JSON file, or None if extraction fails
    """
    extractor = CVExtractor()
    try:
        content = extractor.extract(cv_file_path)
    except Exception as e:
        print(f"❌ Error extracting {cv_file_path}: {e}")
        return None

    # Clean data before processing
    cleaned_content = clean_cv_data(content)

    # Calculate years of experience
    output_dict = {"CV_data": cleaned_content}
    try:
        calculator = ProfessionalExperienceCalculator(cv_data_dict=output_dict)
        years_of_experience = calculator.get_total_years()
        output_dict["CV_data"]["years_of_experience"] = years_of_experience
    except Exception as e:
        print(f"⚠️ Error calculating years of experience for {cv_file_path}: {e}")
        output_dict["CV_data"]["years_of_experience"] = 0.0

    # Save to JSON
    os.makedirs(output_dir, exist_ok=True)
    base_name = os.path.splitext(os.path.basename(cv_file_path))[0]
    output_path = os.path.join(output_dir, f"{base_name}.json")

    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(output_dict, f, indent=2, ensure_ascii=False)

    print(f"✅ Extracted and cleaned CV saved to: {output_path}")
    return output_path

def batch_extract_cvs(input_dir, output_dir="extracted_files"):
    """
    Process multiple CV files in a directory and save extracted data as JSON.
    
    Args:
        input_dir (str): Directory containing CV files
        output_dir (str): Directory to save JSON outputs
    """
    if not os.path.isdir(input_dir):
        print(f"❌ Input directory not found: {input_dir}")
        return

    files = [f for f in os.listdir(input_dir) if f.lower().endswith(('.pdf', '.docx'))]
    if not files:
        print(f"⚠️ No CV files (.pdf or .docx) found in {input_dir}")
        return

    for file_name in files:
        cv_path = os.path.join(input_dir, file_name)
        extract_and_save_cv(cv_path, output_dir)

# Example usage
if __name__ == "__main__":
    extract_and_save_cv("./CVs/Power_BI_Developer.pdf", "./extracted_files/")

Failed to parse JSON content: Expecting value: line 2 column 1 (char 1)


✅ Extracted and cleaned CV saved to: ./extracted_files/Power_BI_Developer.json
