In [None]:
import pandas as pd

df = pd.read_csv('job_postings.csv')

# Removing duplicates
df.drop_duplicates(subset=['job_id'], inplace=True)

# Dropping rows with missing critical data
df.dropna(subset=['max_salary', 'min_salary', 'job_id', 'company_id'], inplace=True)
df.dropna(subset=['formatted_experience_level'], inplace=True)

# Dropping unnecessary columns
df.drop(['job_posting_url', 'description', 'application_url', 'posting_domain', 'sponsored', 
         'compensation_type', 'scraped', 'closed_time', 'skills_desc'], axis=1, inplace=True)

# Filling missing data for non-critical columns
df['remote_allowed'] = df['remote_allowed'].fillna(0)
df['views'] = df['views'].fillna(0)
df['med_salary'] = df['med_salary'].fillna((df['min_salary'] + df['max_salary']) / 2)
df['applies'] = df['applies'].fillna(0)

# Converting Unix timestamps to datetime format
df['original_listed_time'] = pd.to_datetime(df['original_listed_time'], unit='ms')
df['listed_time'] = pd.to_datetime(df['listed_time'], unit='ms')
df['expiry'] = pd.to_datetime(df['expiry'], unit='ms')

# Saving cleaned data
df.to_csv('clean.csv', index=False)