In [6]:
import json
import pandas as pd
import os
import glob

def process_resume(resume_data):
    flattened_data = {
        "Name": resume_data.get("Name", ""),
        "Email": resume_data.get("Email", ""),
        "Phone": resume_data.get("Phone", ""),
        "LinkedIn": resume_data.get("LinkedIn", ""),
        "Github": resume_data.get("Github", ""),
        "Degree": resume_data["Education"].get("Degree", ""),
        "Major": resume_data["Education"].get("Major", ""),
        "Year": resume_data["Education"].get("Year", ""),
        "CGPA": resume_data["Education"].get("CGPA", ""),
        **{f"Experience_{role}": percentage for role, percentage in resume_data.get("UserExperience", {}).items()},
        **{f"Achievement_{key}": value for key, value in resume_data.get("Achievements", {}).items()}
    }

    projects = {f"Project_{i+1}_{key}": value for i, project in enumerate(resume_data.get("Projects", [])) for key, value in project.items()}
    flattened_data.update(projects)

    hard_skills = [f"{skill} ({percentage})" for skill, percentage in resume_data.get("Skills", {}).get("Hard Skills", {}).items()]
    flattened_data["HardSkills"] = ", ".join(hard_skills)

    soft_skills = [f"{skill} ({percentage})" for skill, percentage in resume_data.get("Skills", {}).get("Soft Skills", {}).items()]
    flattened_data["SoftSkills"] = ", ".join(soft_skills)

    return flattened_data


def convert_resumes_to_csv(directory_path, output_csv):
    all_resumes = []
    txt_files = glob.glob(os.path.join(directory_path, "*.txt"))

    for file_path in txt_files:
        with open(file_path, 'r') as f:
            try:
                resume_data = json.load(f)
                flattened_data = process_resume(resume_data)
                all_resumes.append(flattened_data)
            except json.JSONDecodeError:
                print(f"Error decoding JSON from file: {file_path}")

    if all_resumes:
        df = pd.DataFrame(all_resumes)
        df.to_csv(output_csv, index=False)
        print(f"Data has been converted to CSV format and saved as '{output_csv}'")
    else:
        print("No valid resume data found.")


directory_path = "C:/Users/hsahn/OneDrive/Desktop/resume data (json)"  # For Windows



output_csv ="C:/Users/hsahn/OneDrive/Desktop/all_resumes_data2.csv"

convert_resumes_to_csv(directory_path, output_csv)


Data has been converted to CSV format and saved as 'C:/Users/hsahn/OneDrive/Desktop/all_resumes_data2.csv'


In [16]:
import json
import pandas as pd
import os
import glob


import json

import json

def process_resume(resume_data):
    flattened_data = {
        "Name": resume_data.get("Name", ""),
        "Email": resume_data.get("Contact", {}).get("Email", "").lower(),
        "Phone": resume_data.get("Contact", {}).get("Mobile", ""),
        "LinkedIn": resume_data.get("Contact", {}).get("LinkedIn", ""),
        "Github": resume_data.get("Contact", {}).get("GitHub", ""),
        "Degree": resume_data["Education"].get("Degree", ""),
        "Major": resume_data["Education"].get("Major", ""),
        "Year": resume_data["Education"].get("Year", ""),
        "CGPA": resume_data["Education"].get("CGPA", ""),
    }

    experiences = [{"role": exp["Role"], "company": exp["Company"], "duration": exp["Duration"], "description": exp["Description"]} for exp in resume_data.get("UserExperience", [])]
    flattened_data["Experiences"] = json.dumps(experiences)

    projects = [{"title": project["Title"], "duration": project["Duration"], "description": project["Description"]} for project in resume_data.get("Projects", [])]
    flattened_data["Projects"] = json.dumps(projects)

    achievements = resume_data.get("Achievements", [])
    flattened_data["Achievements"] = json.dumps(achievements)

    hard_skills = [{"skill": skill, "percentage": percentage} for skill, percentage in resume_data.get("Skills", {}).get("Hard Skills", {}).items()]
    flattened_data["HardSkills"] = json.dumps(hard_skills)

    soft_skills = [{"skill": skill, "percentage": percentage} for skill, percentage in resume_data.get("Skills", {}).get("Soft Skills", {}).items()]
    flattened_data["SoftSkills"] = json.dumps(soft_skills)

    return flattened_data



def convert_resumes_to_csv(directory_path, output_csv):
    all_resumes = []
    txt_files = glob.glob(os.path.join(directory_path, "*.txt"))

    for file_path in txt_files:
        with open(file_path, 'r') as f:
            try:
                resume_data = json.load(f)
                flattened_data = process_resume(resume_data)
                all_resumes.append(flattened_data)
            except json.JSONDecodeError:
                print(f"Error decoding JSON from file: {file_path}")

    if all_resumes:
        df = pd.DataFrame(all_resumes)
        df.to_csv(output_csv, index=False)
        print(f"Data has been converted to CSV format and saved as '{output_csv}'")
    else:
        print("No valid resume data found.")


directory_path = "C:/Users/hsahn/OneDrive/Desktop/resume data (json)"  

output_csv ="C:/Users/hsahn/OneDrive/Desktop/all_resumes_data.csv"

convert_resumes_to_csv(directory_path, output_csv)


Data has been converted to CSV format and saved as 'C:/Users/hsahn/OneDrive/Desktop/all_resumes_data.csv'
