In [13]:
import pandas as pd

In [14]:
# Read JSON data into a DataFrame
data = pd.read_json("freelancerresult.json")

In [15]:
# Display the shape of the DataFrame
data.shape

(9322, 9)

In [16]:
# Remove Hashtag from 'projectId' column
data['projectId'] = data['projectId'].str[1:]

In [17]:
# Split 'price_range' into two columns
data[['min_price', 'max_price']] = data['price_range'].str.split('-', expand=True)
data['min_price'] = pd.to_numeric(data['min_price'], errors='coerce')
data['max_price'] = pd.to_numeric(data['max_price'], errors='coerce')
data["projectId"] = pd.to_numeric(data["projectId"], errors='coerce')

In [18]:
# Create 'currency' and 'rate_type' columns from 'rate' column
rate_split = data['rate'].str.split('/', expand=True)
data["currency"] = rate_split[0]
data["rate_type"] = rate_split[1].apply(lambda x: "fixed" if pd.isna(x) else "hourly")

In [19]:
#  Split 'client_location' into 'state' and 'country' columns
data["location_split"] = pd.DataFrame(data['client_location'].str.split(', '))
data['client_state'] = data["location_split"].apply(lambda x: None if len(x) == 1 else x[0])
data['client_country'] = data["location_split"].apply(lambda x: x[1] if len(x) > 1 else x[0])

In [20]:
# Filter out rows where 'currency' length is greater than 3
data = data[data["currency"].str.len() <=3]
# Calculate and add 'avg_price' column
data["avg_price"] = (data["min_price"] + data["max_price"]) / 2
# Remove rows where 'avg_price' is NaN
data = data[pd.notna(data["avg_price"])]

In [21]:
# Convert 'tags' to lowercase
data["tags"] = data["tags"].apply(lambda x: [element.lower() for element in x])

In [22]:
# Remove the unnecessary columns
data.drop(["client_location", "location_split", "price_range", "rate"], axis=1, inplace=True)
# List of column names in the desired order
column_order = ['projectId', 'job_title', 'job_description', 'tags', 'client_state', 'client_country', 'client_average_rating', 'client_review_count', 'min_price', 'max_price', 'avg_price', 'currency', 'rate_type']

# Reorder the columns
data = data[column_order]


In [23]:
# Save the cleaned data to a CSV file
data.to_csv("freelancer_job_postings.csv", index = False)

In [24]:
data

Unnamed: 0,projectId,job_title,job_description,tags,client_state,client_country,client_average_rating,client_review_count,min_price,max_price,avg_price,currency,rate_type
0,37426471,development and implementation of a federated ...,please bid only if you are ready to do the wor...,"[algorithm, java, python, machine learning (ml...",Heilbronn,Germany,5.0,17,8.0,30.0,19.0,EUR,fixed
1,37421546,Data Entry -- 2,Project Title: Data Entry - Data Analysis in E...,"[excel, statistical analysis, statistics, spss...",Nagpur,India,0.0,0,750.0,1250.0,1000.0,INR,hourly
2,37400492,Data Scrap,I am looking for a freelancer who can help me ...,"[web scraping, data mining, data entry, excel,...",Eaubonne,France,5.0,1,30.0,250.0,140.0,EUR,fixed
3,37404568,Big Data Project,Store Sales Data Analysis: A Data Engineering ...,"[big data sales, data science, data mining, st...",Mundra,India,5.0,2,5000.0,5500.0,5250.0,INR,fixed
4,37397423,Virtual Assistant / Research Assistant,Job Description: I am seeking a Virtual Assist...,"[data entry, virtual assistant, web search, ex...",,United States,0.0,0,5.0,15.0,10.0,USD,hourly
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9317,31183091,Computer Vision / Feature Extraction from Aeri...,Looking for an experienced computer vision / f...,"[python, deep learning, computer vision, image...",Buenos Aires,Argentina,5.0,12,30.0,250.0,140.0,USD,fixed
9318,31184739,Database and software analysis,I need a freelancer who's proficient in micros...,[data processing],,United Kingdom,0.0,0,250.0,750.0,500.0,USD,fixed
9319,31185401,Business Analyst (Strategic Projects) -- 2,Role Purpose The role’s purpose is to superch...,"[python, business analysis, financial analysis...",Sydney,Australia,5.0,39,15.0,25.0,20.0,AUD,hourly
9320,31189015,AI developer needed,We are an italian startup seeking for a machin...,"[data analysis, machine learning (ml), python]",Modena,Italy,0.0,0,3000.0,5000.0,4000.0,EUR,fixed
