In [21]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
%matplotlib inline
pd.pandas.set_option('display.max_columns',None)

In [22]:
df = pd.read_csv('data/Final23_1_salary.csv')

In [23]:
df.head()

Unnamed: 0,work_year,experience_level,employment_type,job_title,salary,salary_currency,salary_in_usd,employee_residence,remote_ratio,company_location,company_size
0,2023,SE,FT,Principal Data Scientist,80000,EUR,85847,ES,100,ES,L
1,2023,MI,CT,ML Engineer,30000,USD,30000,US,100,US,S
2,2023,MI,CT,ML Engineer,25500,USD,25500,US,100,US,S
3,2023,SE,FT,Data Scientist,175000,USD,175000,CA,100,CA,M
4,2023,SE,FT,Data Scientist,120000,USD,120000,CA,100,CA,M


### Dropping the numerical columns 'work_year', 'salary' and 'remote_ratio'

In [24]:
df = df.drop(['work_year', 'salary','remote_ratio'],axis=1)

### Handling categorical coluumns

In [27]:
categorical_features = [feature for feature in df.columns if df[feature].dtype == 'O']
print('We have {} categorical features : {}'.format(len(categorical_features), categorical_features))

We have 7 categorical features : ['experience_level', 'employment_type', 'job_title', 'salary_currency', 'employee_residence', 'company_location', 'company_size']


In [28]:
for feature in categorical_features:
    print(feature, len(df[feature].unique()))

experience_level 4
employment_type 4
job_title 93
salary_currency 20
employee_residence 78
company_location 72
company_size 3


In [29]:
for feature in categorical_features:
    print('--------------------------------------------------------------------------------------------------------')
    print(feature, df[feature].value_counts().head(20))

--------------------------------------------------------------------------------------------------------
experience_level SE    2516
MI     805
EN     320
EX     114
Name: experience_level, dtype: int64
--------------------------------------------------------------------------------------------------------
employment_type FT    3718
PT      17
CT      10
FL      10
Name: employment_type, dtype: int64
--------------------------------------------------------------------------------------------------------
job_title Data Engineer                 1040
Data Scientist                 840
Data Analyst                   612
Machine Learning Engineer      289
Analytics Engineer             103
Data Architect                 101
Research Scientist              82
Data Science Manager            58
Applied Scientist               58
Research Engineer               37
ML Engineer                     34
Data Manager                    29
Machine Learning Scientist      26
Data Science Consultant   

### Ordinal Encoding on Experience Level and Company size

In [8]:
df['company_size'].unique()

array(['L', 'S', 'M'], dtype=object)

In [9]:
df['experience_level'].unique()

array(['SE', 'MI', 'EN', 'EX'], dtype=object)

In [10]:
ordinal_mapping_company_size = {'S': 0, 'M': 1, 'L': 2}
ordinal_mapping_experience_level = {'EN': 0, 'MI': 1, 'SE': 2, 'EX': 3}

df['company_size_ordinal'] = df['company_size'].map(ordinal_mapping_company_size)
df['experience_level_ordinal'] = df['experience_level'].map(ordinal_mapping_experience_level)

In [11]:
df.head()

Unnamed: 0,experience_level,employment_type,job_title,salary_currency,salary_in_usd,employee_residence,company_location,company_size,company_size_ordinal,experience_level_ordinal
0,SE,FT,Principal Data Scientist,EUR,85847,ES,ES,L,2,2
1,MI,CT,ML Engineer,USD,30000,US,US,S,0,1
2,MI,CT,ML Engineer,USD,25500,US,US,S,0,1
3,SE,FT,Data Scientist,USD,175000,CA,CA,M,1,2
4,SE,FT,Data Scientist,USD,120000,CA,CA,M,1,2


### OneHot Encoding rest of the categorical coumns

In [12]:
df=pd.get_dummies(df,columns=['employment_type','company_location','employee_residence','job_title'] ,drop_first = True)
df.head()

Unnamed: 0,experience_level,salary_currency,salary_in_usd,company_size,company_size_ordinal,experience_level_ordinal,employment_type_FL,employment_type_FT,employment_type_PT,company_location_AL,company_location_AM,company_location_AR,company_location_AS,company_location_AT,company_location_AU,company_location_BA,company_location_BE,company_location_BO,company_location_BR,company_location_BS,company_location_CA,company_location_CF,company_location_CH,company_location_CL,company_location_CN,company_location_CO,company_location_CR,company_location_CZ,company_location_DE,company_location_DK,company_location_DZ,company_location_EE,company_location_EG,company_location_ES,company_location_FI,company_location_FR,company_location_GB,company_location_GH,company_location_GR,company_location_HK,company_location_HN,company_location_HR,company_location_HU,company_location_ID,company_location_IE,company_location_IL,company_location_IN,company_location_IQ,company_location_IR,company_location_IT,company_location_JP,company_location_KE,company_location_LT,company_location_LU,company_location_LV,company_location_MA,company_location_MD,company_location_MK,company_location_MT,company_location_MX,company_location_MY,company_location_NG,company_location_NL,company_location_NZ,company_location_PH,company_location_PK,company_location_PL,company_location_PR,company_location_PT,company_location_RO,company_location_RU,company_location_SE,company_location_SG,company_location_SI,company_location_SK,company_location_TH,company_location_TR,company_location_UA,company_location_US,company_location_VN,employee_residence_AM,employee_residence_AR,employee_residence_AS,employee_residence_AT,employee_residence_AU,employee_residence_BA,employee_residence_BE,employee_residence_BG,employee_residence_BO,employee_residence_BR,employee_residence_CA,employee_residence_CF,employee_residence_CH,employee_residence_CL,employee_residence_CN,employee_residence_CO,employee_residence_CR,employee_residence_CY,employee_residence_CZ,employee_residence_DE,employee_residence_DK,employee_residence_DO,employee_residence_DZ,employee_residence_EE,employee_residence_EG,employee_residence_ES,employee_residence_FI,employee_residence_FR,employee_residence_GB,employee_residence_GH,employee_residence_GR,employee_residence_HK,employee_residence_HN,employee_residence_HR,employee_residence_HU,employee_residence_ID,employee_residence_IE,employee_residence_IL,employee_residence_IN,employee_residence_IQ,employee_residence_IR,employee_residence_IT,employee_residence_JE,employee_residence_JP,employee_residence_KE,employee_residence_KW,employee_residence_LT,employee_residence_LU,employee_residence_LV,employee_residence_MA,employee_residence_MD,employee_residence_MK,employee_residence_MT,employee_residence_MX,employee_residence_MY,employee_residence_NG,employee_residence_NL,employee_residence_NZ,employee_residence_PH,employee_residence_PK,employee_residence_PL,employee_residence_PR,employee_residence_PT,employee_residence_RO,employee_residence_RS,employee_residence_RU,employee_residence_SE,employee_residence_SG,employee_residence_SI,employee_residence_SK,employee_residence_TH,employee_residence_TN,employee_residence_TR,employee_residence_UA,employee_residence_US,employee_residence_UZ,employee_residence_VN,job_title_AI Developer,job_title_AI Programmer,job_title_AI Scientist,job_title_Analytics Engineer,job_title_Applied Data Scientist,job_title_Applied Machine Learning Engineer,job_title_Applied Machine Learning Scientist,job_title_Applied Scientist,job_title_Autonomous Vehicle Technician,job_title_Azure Data Engineer,job_title_BI Analyst,job_title_BI Data Analyst,job_title_BI Data Engineer,job_title_BI Developer,job_title_Big Data Architect,job_title_Big Data Engineer,job_title_Business Data Analyst,job_title_Business Intelligence Engineer,job_title_Cloud Data Architect,job_title_Cloud Data Engineer,job_title_Cloud Database Engineer,job_title_Compliance Data Analyst,job_title_Computer Vision Engineer,job_title_Computer Vision Software Engineer,job_title_Data Analyst,job_title_Data Analytics Consultant,job_title_Data Analytics Engineer,job_title_Data Analytics Lead,job_title_Data Analytics Manager,job_title_Data Analytics Specialist,job_title_Data Architect,job_title_Data DevOps Engineer,job_title_Data Engineer,job_title_Data Infrastructure Engineer,job_title_Data Lead,job_title_Data Management Specialist,job_title_Data Manager,job_title_Data Modeler,job_title_Data Operations Analyst,job_title_Data Operations Engineer,job_title_Data Quality Analyst,job_title_Data Science Consultant,job_title_Data Science Engineer,job_title_Data Science Lead,job_title_Data Science Manager,job_title_Data Science Tech Lead,job_title_Data Scientist,job_title_Data Scientist Lead,job_title_Data Specialist,job_title_Data Strategist,job_title_Deep Learning Engineer,job_title_Deep Learning Researcher,job_title_Director of Data Science,job_title_ETL Developer,job_title_ETL Engineer,job_title_Finance Data Analyst,job_title_Financial Data Analyst,job_title_Head of Data,job_title_Head of Data Science,job_title_Head of Machine Learning,job_title_Insight Analyst,job_title_Lead Data Analyst,job_title_Lead Data Engineer,job_title_Lead Data Scientist,job_title_Lead Machine Learning Engineer,job_title_ML Engineer,job_title_MLOps Engineer,job_title_Machine Learning Developer,job_title_Machine Learning Engineer,job_title_Machine Learning Infrastructure Engineer,job_title_Machine Learning Manager,job_title_Machine Learning Research Engineer,job_title_Machine Learning Researcher,job_title_Machine Learning Scientist,job_title_Machine Learning Software Engineer,job_title_Manager Data Management,job_title_Marketing Data Analyst,job_title_Marketing Data Engineer,job_title_NLP Engineer,job_title_Power BI Developer,job_title_Principal Data Analyst,job_title_Principal Data Architect,job_title_Principal Data Engineer,job_title_Principal Data Scientist,job_title_Principal Machine Learning Engineer,job_title_Product Data Analyst,job_title_Product Data Scientist,job_title_Research Engineer,job_title_Research Scientist,job_title_Software Data Engineer,job_title_Staff Data Analyst,job_title_Staff Data Scientist
0,SE,EUR,85847,L,2,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
1,MI,USD,30000,S,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,MI,USD,25500,S,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,SE,USD,175000,M,1,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,SE,USD,120000,M,1,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### Droping the uncessary columns

In [13]:
df = df.drop(['salary_currency','experience_level','company_size'],axis=1)

In [14]:
df.head()

Unnamed: 0,salary_in_usd,company_size_ordinal,experience_level_ordinal,employment_type_FL,employment_type_FT,employment_type_PT,company_location_AL,company_location_AM,company_location_AR,company_location_AS,company_location_AT,company_location_AU,company_location_BA,company_location_BE,company_location_BO,company_location_BR,company_location_BS,company_location_CA,company_location_CF,company_location_CH,company_location_CL,company_location_CN,company_location_CO,company_location_CR,company_location_CZ,company_location_DE,company_location_DK,company_location_DZ,company_location_EE,company_location_EG,company_location_ES,company_location_FI,company_location_FR,company_location_GB,company_location_GH,company_location_GR,company_location_HK,company_location_HN,company_location_HR,company_location_HU,company_location_ID,company_location_IE,company_location_IL,company_location_IN,company_location_IQ,company_location_IR,company_location_IT,company_location_JP,company_location_KE,company_location_LT,company_location_LU,company_location_LV,company_location_MA,company_location_MD,company_location_MK,company_location_MT,company_location_MX,company_location_MY,company_location_NG,company_location_NL,company_location_NZ,company_location_PH,company_location_PK,company_location_PL,company_location_PR,company_location_PT,company_location_RO,company_location_RU,company_location_SE,company_location_SG,company_location_SI,company_location_SK,company_location_TH,company_location_TR,company_location_UA,company_location_US,company_location_VN,employee_residence_AM,employee_residence_AR,employee_residence_AS,employee_residence_AT,employee_residence_AU,employee_residence_BA,employee_residence_BE,employee_residence_BG,employee_residence_BO,employee_residence_BR,employee_residence_CA,employee_residence_CF,employee_residence_CH,employee_residence_CL,employee_residence_CN,employee_residence_CO,employee_residence_CR,employee_residence_CY,employee_residence_CZ,employee_residence_DE,employee_residence_DK,employee_residence_DO,employee_residence_DZ,employee_residence_EE,employee_residence_EG,employee_residence_ES,employee_residence_FI,employee_residence_FR,employee_residence_GB,employee_residence_GH,employee_residence_GR,employee_residence_HK,employee_residence_HN,employee_residence_HR,employee_residence_HU,employee_residence_ID,employee_residence_IE,employee_residence_IL,employee_residence_IN,employee_residence_IQ,employee_residence_IR,employee_residence_IT,employee_residence_JE,employee_residence_JP,employee_residence_KE,employee_residence_KW,employee_residence_LT,employee_residence_LU,employee_residence_LV,employee_residence_MA,employee_residence_MD,employee_residence_MK,employee_residence_MT,employee_residence_MX,employee_residence_MY,employee_residence_NG,employee_residence_NL,employee_residence_NZ,employee_residence_PH,employee_residence_PK,employee_residence_PL,employee_residence_PR,employee_residence_PT,employee_residence_RO,employee_residence_RS,employee_residence_RU,employee_residence_SE,employee_residence_SG,employee_residence_SI,employee_residence_SK,employee_residence_TH,employee_residence_TN,employee_residence_TR,employee_residence_UA,employee_residence_US,employee_residence_UZ,employee_residence_VN,job_title_AI Developer,job_title_AI Programmer,job_title_AI Scientist,job_title_Analytics Engineer,job_title_Applied Data Scientist,job_title_Applied Machine Learning Engineer,job_title_Applied Machine Learning Scientist,job_title_Applied Scientist,job_title_Autonomous Vehicle Technician,job_title_Azure Data Engineer,job_title_BI Analyst,job_title_BI Data Analyst,job_title_BI Data Engineer,job_title_BI Developer,job_title_Big Data Architect,job_title_Big Data Engineer,job_title_Business Data Analyst,job_title_Business Intelligence Engineer,job_title_Cloud Data Architect,job_title_Cloud Data Engineer,job_title_Cloud Database Engineer,job_title_Compliance Data Analyst,job_title_Computer Vision Engineer,job_title_Computer Vision Software Engineer,job_title_Data Analyst,job_title_Data Analytics Consultant,job_title_Data Analytics Engineer,job_title_Data Analytics Lead,job_title_Data Analytics Manager,job_title_Data Analytics Specialist,job_title_Data Architect,job_title_Data DevOps Engineer,job_title_Data Engineer,job_title_Data Infrastructure Engineer,job_title_Data Lead,job_title_Data Management Specialist,job_title_Data Manager,job_title_Data Modeler,job_title_Data Operations Analyst,job_title_Data Operations Engineer,job_title_Data Quality Analyst,job_title_Data Science Consultant,job_title_Data Science Engineer,job_title_Data Science Lead,job_title_Data Science Manager,job_title_Data Science Tech Lead,job_title_Data Scientist,job_title_Data Scientist Lead,job_title_Data Specialist,job_title_Data Strategist,job_title_Deep Learning Engineer,job_title_Deep Learning Researcher,job_title_Director of Data Science,job_title_ETL Developer,job_title_ETL Engineer,job_title_Finance Data Analyst,job_title_Financial Data Analyst,job_title_Head of Data,job_title_Head of Data Science,job_title_Head of Machine Learning,job_title_Insight Analyst,job_title_Lead Data Analyst,job_title_Lead Data Engineer,job_title_Lead Data Scientist,job_title_Lead Machine Learning Engineer,job_title_ML Engineer,job_title_MLOps Engineer,job_title_Machine Learning Developer,job_title_Machine Learning Engineer,job_title_Machine Learning Infrastructure Engineer,job_title_Machine Learning Manager,job_title_Machine Learning Research Engineer,job_title_Machine Learning Researcher,job_title_Machine Learning Scientist,job_title_Machine Learning Software Engineer,job_title_Manager Data Management,job_title_Marketing Data Analyst,job_title_Marketing Data Engineer,job_title_NLP Engineer,job_title_Power BI Developer,job_title_Principal Data Analyst,job_title_Principal Data Architect,job_title_Principal Data Engineer,job_title_Principal Data Scientist,job_title_Principal Machine Learning Engineer,job_title_Product Data Analyst,job_title_Product Data Scientist,job_title_Research Engineer,job_title_Research Scientist,job_title_Software Data Engineer,job_title_Staff Data Analyst,job_title_Staff Data Scientist
0,85847,2,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
1,30000,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,25500,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,175000,1,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,120000,1,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [15]:
df.shape

(3755, 246)

In [17]:
df.to_csv('data/final_df.csv', index=False)