In [15]:
import os
import json
import pandas as pd

In [16]:

def process_job_search(folder_path):
    #an empty list to store the JSON data
    json_data_list = []

    #get the list of files in the folder
    file_list = os.listdir(folder_path)

    #iterate over the files
    for file_name in file_list:
        if file_name.endswith('.json'):  # Check if the file is a JSON file
            file_path = os.path.join(folder_path, file_name)  # Construct the file path
            with open(file_path, 'r') as file:
                json_data = json.load(file)  # Load the JSON data
                jobs_results = json_data.get("jobs_results", [])  # Get the list of job results
                search_parameters = json_data.get("search_parameters", {})  # Get the search parameters

                #iterate over the jobs results
                for job_result in jobs_results:
                    job_result["searching keyword"] = search_parameters.get("q", "")  # Add the 'q' value to the job result

                    job_result["search_metadata_id"] = json_data.get("search_metadata", {}).get("id", "")  # Add search_metadata_id to the job result

                    #extract Qualifications and Responsibilities from job_highlights
                    job_highlights = job_result.get("job_highlights", [])
                    qualifications = ""
                    responsibilities = ""

                    for highlight in job_highlights:
                        title = highlight.get("title", "")
                        items = highlight.get("items", [])

                        if title == "Qualifications":
                            qualifications = " ".join(items)
                        elif title == "Responsibilities":
                            responsibilities = " ".join(items)

                    job_result["Qualifications"] = qualifications
                    job_result["Responsibilities"] = responsibilities

                    #process the description
                    description = job_result.get("description", "")
                    description = description.replace("\n", " ")  # Replace newlines with spaces
                    job_result["description"] = description

                    #extract information from detected_extensions into separate columns
                    detected_extensions = job_result.get("detected_extensions", {})
                    salary = detected_extensions.get("salary", "")
                    schedule_type = detected_extensions.get("schedule_type", "")

                    job_result["salary"] = salary
                    job_result["schedule_type"] = schedule_type

                    #append the job result to the list
                    json_data_list.append(job_result)

    #create a DataFrame from the JSON data
    df = pd.DataFrame(json_data_list)
    df = df.drop(['job_highlights', 'related_links', 'detected_extensions', 'extensions', 'search_metadata_id'], axis=1)
    df = df.rename(columns={'q': 'searching keyword'})
    df['via'] = df['via'].str.replace('via ', '')

    return df

In [17]:
folder_path = '../Data Set/2023-04-14-job-search/2023-04-14-job-search-location-DC'
df_DC = process_job_search(folder_path)

In [18]:
df_DC.head()

Unnamed: 0,title,company_name,location,via,description,job_id,searching keyword,Qualifications,Responsibilities,salary,schedule_type
0,Ethereum Blockchain Developer (Remote),Ex Populus,Anywhere,Built In,Company Overview: Ex Populus is a cutting-edge...,eyJqb2JfdGl0bGUiOiJFdGhlcmV1bSBCbG9ja2NoYWluIE...,block chain,2-3 years of Software Development experience 1...,"Design, maintain and deploy smart contracts fo...",,Full-time
1,Blockchain Engineer,21.co,"New York, NY",Greenhouse,We are seeking a highly motivated and skilled ...,eyJqb2JfdGl0bGUiOiJCbG9ja2NoYWluIEVuZ2luZWVyIi...,block chain,Bachelor's or Master's degree in Computer Scie...,"As a Blockchain Engineer, you will be responsi...",,Full-time
2,Blockchain Course Instructor,Blockchain Institute of Technology,Anywhere,LinkedIn,"Are you a blockchain, cryptocurrency, NFT, Met...",eyJqb2JfdGl0bGUiOiJCbG9ja2NoYWluIENvdXJzZSBJbn...,block chain,"3+ years of experience in blockchain, cryptocu...",Our expert technical team will provide the sup...,,Contractor
3,Python based - Blockchain developer to join ex...,Upwork,Anywhere,Upwork,Need someone to join our existing team to spee...,eyJqb2JfdGl0bGUiOiJQeXRob24gYmFzZWQgLSBCbG9ja2...,block chain,"Candidates must be willing to sign, non-disclo...",Will discuss details with the selected candidates,10–30 an hour,Contractor
4,Blockchain DevOps Engineer (Remote),Telnyx,United States,Startup Jobs,"About Telnyx At Telnyx, we’re architecting an...",eyJqb2JfdGl0bGUiOiJCbG9ja2NoYWluIERldk9wcyBFbm...,block chain,You are a highly motivated and experienced Blo...,To build a best-in-class Filecoin (FIL) Mining...,,Full-time


In [19]:
folder_path = '../Data Set/2023-04-14-job-search/2023-04-14-job-search-location-USA'
df_USA = process_job_search(folder_path)

In [20]:
df_USA.head()

Unnamed: 0,title,company_name,location,via,description,job_id,searching keyword,Qualifications,Responsibilities,salary,schedule_type
0,Senior Blockchain Software Engineer,Truist,Anywhere,ZipRecruiter,The position is described below. If you want t...,eyJqb2JfdGl0bGUiOiJTZW5pb3IgQmxvY2tjaGFpbiBTb2...,block chain,Language Fluency: English (Required) The requi...,Deliver highly complex solutions with signific...,,Full-time
1,Get twitter accounts that are tied to blockcha...,Upwork,Anywhere,Upwork,I am looking for a list of blockchain addresse...,eyJqb2JfdGl0bGUiOiJHZXQgdHdpdHRlciBhY2NvdW50cy...,block chain,,,,Contractor
2,Blockchain Analyst,Allium,"New York, NY",Jobs,About the role • Sherlock & Enola Holmes level...,eyJqb2JfdGl0bGUiOiJCbG9ja2NoYWluIEFuYWx5c3QiLC...,block chain,Ability to parse and understand EVM (and/or So...,Tackling interesting problems - we are ingesti...,,Full-time
3,Full Stack Blockchain Engineer,geojam,"Los Angeles, CA",AngelList,"Who We Are Launched in 2020, Geojam is a soci...",eyJqb2JfdGl0bGUiOiJGdWxsIFN0YWNrIEJsb2NrY2hhaW...,block chain,We’re looking for a capable generalist enginee...,You will be designing and building our core so...,,Full-time
4,Blockchain Engineer - Apps Platform at Exodus ...,Exodus,"Livermore, CA","Livermore, CA - Geebo",Exodus is looking for a detail-orientated Bloc...,eyJqb2JfdGl0bGUiOiJCbG9ja2NoYWluIEVuZ2luZWVyIC...,block chain,,,20–28 an hour,Full-time


In [21]:
#combine the datasets
combined_df = pd.concat([df_DC, df_USA])

#reset the index of the combined DataFrame
combined_df = combined_df.reset_index(drop=True)


In [22]:
combined_df

Unnamed: 0,title,company_name,location,via,description,job_id,searching keyword,Qualifications,Responsibilities,salary,schedule_type
0,Ethereum Blockchain Developer (Remote),Ex Populus,Anywhere,Built In,Company Overview: Ex Populus is a cutting-edge...,eyJqb2JfdGl0bGUiOiJFdGhlcmV1bSBCbG9ja2NoYWluIE...,block chain,2-3 years of Software Development experience 1...,"Design, maintain and deploy smart contracts fo...",,Full-time
1,Blockchain Engineer,21.co,"New York, NY",Greenhouse,We are seeking a highly motivated and skilled ...,eyJqb2JfdGl0bGUiOiJCbG9ja2NoYWluIEVuZ2luZWVyIi...,block chain,Bachelor's or Master's degree in Computer Scie...,"As a Blockchain Engineer, you will be responsi...",,Full-time
2,Blockchain Course Instructor,Blockchain Institute of Technology,Anywhere,LinkedIn,"Are you a blockchain, cryptocurrency, NFT, Met...",eyJqb2JfdGl0bGUiOiJCbG9ja2NoYWluIENvdXJzZSBJbn...,block chain,"3+ years of experience in blockchain, cryptocu...",Our expert technical team will provide the sup...,,Contractor
3,Python based - Blockchain developer to join ex...,Upwork,Anywhere,Upwork,Need someone to join our existing team to spee...,eyJqb2JfdGl0bGUiOiJQeXRob24gYmFzZWQgLSBCbG9ja2...,block chain,"Candidates must be willing to sign, non-disclo...",Will discuss details with the selected candidates,10–30 an hour,Contractor
4,Blockchain DevOps Engineer (Remote),Telnyx,United States,Startup Jobs,"About Telnyx At Telnyx, we’re architecting an...",eyJqb2JfdGl0bGUiOiJCbG9ja2NoYWluIERldk9wcyBFbm...,block chain,You are a highly motivated and experienced Blo...,To build a best-in-class Filecoin (FIL) Mining...,,Full-time
...,...,...,...,...,...,...,...,...,...,...,...
818,Machine Learning Engineer/ Data Scientist- NLP...,Supportiv,"Chicago, IL",KLFY Jobs,"First, The Data! • Over 1 million unique chat ...",eyJqb2JfdGl0bGUiOiJNYWNoaW5lIExlYXJuaW5nIEVuZ2...,natural language processing,[1 day] Take-home programming challenge [60 mi...,The role will serve as a core member within th...,,Internship
819,Post-Doc Researcher for Natural Language Proce...,Robert Bosch,"Sunnyvale, CA",Ladders,Company Description The Bosch Research and Te...,eyJqb2JfdGl0bGUiOiJQb3N0LURvYyBSZXNlYXJjaGVyIG...,natural language processing,Ph.D. in computer science or engineering 3+ ye...,Conduct research on Natural Language Processin...,100K–150K a year,Full-time
820,Technical Leader - Natural Language Processing...,Kitware,"Minneapolis, MN",ZipRecruiter,Team Description: Our computer vision team is...,eyJqb2JfdGl0bGUiOiJUZWNobmljYWwgTGVhZGVyIC0gTm...,natural language processing,PhD in Computer Science or related field with ...,Lead proposals for new funding from government...,,Full-time
821,"Lead Research Scientist, Natural Language Proc...",Snap Inc.,"Los Angeles, CA",Jooble,"Lead Research Scientist, Natural Language Proc...",eyJqb2JfdGl0bGUiOiJMZWFkIFJlc2VhcmNoIFNjaWVudG...,natural language processing,"Strong technical knowledge of statistics, mach...",Define and own a roadmap of large-scale resear...,,Full-time


In [23]:

combined_df.drop_duplicates(subset=combined_df.columns.difference(['job_id']), keep='first', inplace=True)

In [24]:
combined_df

Unnamed: 0,title,company_name,location,via,description,job_id,searching keyword,Qualifications,Responsibilities,salary,schedule_type
0,Ethereum Blockchain Developer (Remote),Ex Populus,Anywhere,Built In,Company Overview: Ex Populus is a cutting-edge...,eyJqb2JfdGl0bGUiOiJFdGhlcmV1bSBCbG9ja2NoYWluIE...,block chain,2-3 years of Software Development experience 1...,"Design, maintain and deploy smart contracts fo...",,Full-time
1,Blockchain Engineer,21.co,"New York, NY",Greenhouse,We are seeking a highly motivated and skilled ...,eyJqb2JfdGl0bGUiOiJCbG9ja2NoYWluIEVuZ2luZWVyIi...,block chain,Bachelor's or Master's degree in Computer Scie...,"As a Blockchain Engineer, you will be responsi...",,Full-time
2,Blockchain Course Instructor,Blockchain Institute of Technology,Anywhere,LinkedIn,"Are you a blockchain, cryptocurrency, NFT, Met...",eyJqb2JfdGl0bGUiOiJCbG9ja2NoYWluIENvdXJzZSBJbn...,block chain,"3+ years of experience in blockchain, cryptocu...",Our expert technical team will provide the sup...,,Contractor
3,Python based - Blockchain developer to join ex...,Upwork,Anywhere,Upwork,Need someone to join our existing team to spee...,eyJqb2JfdGl0bGUiOiJQeXRob24gYmFzZWQgLSBCbG9ja2...,block chain,"Candidates must be willing to sign, non-disclo...",Will discuss details with the selected candidates,10–30 an hour,Contractor
4,Blockchain DevOps Engineer (Remote),Telnyx,United States,Startup Jobs,"About Telnyx At Telnyx, we’re architecting an...",eyJqb2JfdGl0bGUiOiJCbG9ja2NoYWluIERldk9wcyBFbm...,block chain,You are a highly motivated and experienced Blo...,To build a best-in-class Filecoin (FIL) Mining...,,Full-time
...,...,...,...,...,...,...,...,...,...,...,...
812,Blockchain Operations - Technical Analyst,Chainlink Labs,Anywhere,LinkedIn,As a Blockchain Operations - Technical Analyst...,eyJqb2JfdGl0bGUiOiJCbG9ja2NoYWluIE9wZXJhdGlvbn...,block chain,Have comfort and familiarity with complex engi...,As a Blockchain Operations - Technical Analyst...,,Contractor
815,Natural Language Processing Engineer,"Brain Technologies, Inc.","San Mateo, CA",Karkidi,Responsibilities: • Learning from user interac...,eyJqb2JfdGl0bGUiOiJOYXR1cmFsIExhbmd1YWdlIFByb2...,natural language processing,Job entails working with and requires Master’s...,Learning from user interactions with our NLP s...,160K–250K a year,Full-time
816,Machine Learning Engineer/ Data Scientist- NLP...,Supportiv,"San Jose, CA",WDHN Jobs,"First, The Data! • Over 1 million unique chat ...",eyJqb2JfdGl0bGUiOiJNYWNoaW5lIExlYXJuaW5nIEVuZ2...,natural language processing,[1 day] Take-home programming challenge [60 mi...,The role will serve as a core member within th...,,Internship
818,Machine Learning Engineer/ Data Scientist- NLP...,Supportiv,"Chicago, IL",KLFY Jobs,"First, The Data! • Over 1 million unique chat ...",eyJqb2JfdGl0bGUiOiJNYWNoaW5lIExlYXJuaW5nIEVuZ2...,natural language processing,[1 day] Take-home programming challenge [60 mi...,The role will serve as a core member within th...,,Internship


In [25]:
combined_df[combined_df['title'] == 'Ethereum Blockchain Developer (Remote)']

Unnamed: 0,title,company_name,location,via,description,job_id,searching keyword,Qualifications,Responsibilities,salary,schedule_type
0,Ethereum Blockchain Developer (Remote),Ex Populus,Anywhere,Built In,Company Overview: Ex Populus is a cutting-edge...,eyJqb2JfdGl0bGUiOiJFdGhlcmV1bSBCbG9ja2NoYWluIE...,block chain,2-3 years of Software Development experience 1...,"Design, maintain and deploy smart contracts fo...",,Full-time


In [26]:
#extract combined_df to csv( in foloder Data Set)
combined_df.to_csv('../Data Set/combined_df.csv', index=False)