## Выгрузка данных о валютах и внедрение их в таблицу

In [1]:
# загружаем необходимые библиотеки
import pandas as pd
import numpy as np

In [121]:
# загружаем датасет
data = pd.read_csv('data\ds_salaries.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,work_year,experience_level,employment_type,job_title,salary,salary_currency,salary_in_usd,employee_residence,remote_ratio,company_location,company_size
0,0,2020,MI,FT,Data Scientist,70000,EUR,79833,DE,0,DE,L
1,1,2020,SE,FT,Machine Learning Scientist,260000,USD,260000,JP,0,JP,S
2,2,2020,SE,FT,Big Data Engineer,85000,GBP,109024,GB,50,GB,M
3,3,2020,MI,FT,Product Data Analyst,20000,USD,20000,HN,0,HN,S
4,4,2020,SE,FT,Machine Learning Engineer,150000,USD,150000,US,50,US,L


### Видим, что зп выражены в разных единицах, надо привести к одной для дальнейшего анализа

In [122]:
data['salary_currency'].unique()

array(['EUR', 'USD', 'GBP', 'HUF', 'INR', 'JPY', 'CNY', 'MXN', 'CAD',
       'DKK', 'PLN', 'SGD', 'CLP', 'BRL', 'TRY', 'AUD', 'CHF'],
      dtype=object)

In [123]:
import yfinance as yf # импорт библиотеки для скачивания котировок

# лист с уникальными валютами
tickers_list = ['EUR', 'GBP', 'HUF', 'INR', 'JPY', 'CNY', 'MXN', 'CAD',
       'DKK', 'PLN', 'SGD', 'CLP', 'BRL', 'TRY', 'AUD', 'CHF'] # приводим все к доллару, исключаем сам доллар

# приводим их к формату, необходимому YF
tickers_for_yf = [x + 'USD=X' for x in tickers_list]

# собираем датасет
tickers = pd.DataFrame(columns=tickers_for_yf)

for ticker in tickers_for_yf:
    tickers[ticker] = yf.download(ticker,'2023-05-05','2023-05-06')['Adj Close']

# возвращаем нормальные имена для соответствия
tickers.columns = tickers_list
# транспонируем, чтобы валюты и пропорции стали столбцами
tickers = tickers.T.reset_index()
# добавляем доллар с пропорцией 1\1
tickers.loc[len(tickers)] = ['USD', 1.00]
tickers.columns = ['salary_currency', 'proportion'] # переименовываем столбцы
display(tickers)

# объединяем 2 таблицы
data_m = data.merge( 
    tickers,
    on='salary_currency',
    how='left'
)

display(data_m)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Unnamed: 0,salary_currency,proportion
0,EUR,1.102293
1,GBP,1.258336
2,HUF,0.002956
3,INR,0.012237
4,JPY,0.007453
5,CNY,0.144808
6,MXN,0.055868
7,CAD,0.739481
8,DKK,0.147976
9,PLN,0.240367


Unnamed: 0.1,Unnamed: 0,work_year,experience_level,employment_type,job_title,salary,salary_currency,salary_in_usd,employee_residence,remote_ratio,company_location,company_size,proportion
0,0,2020,MI,FT,Data Scientist,70000,EUR,79833,DE,0,DE,L,1.102293
1,1,2020,SE,FT,Machine Learning Scientist,260000,USD,260000,JP,0,JP,S,1.000000
2,2,2020,SE,FT,Big Data Engineer,85000,GBP,109024,GB,50,GB,M,1.258336
3,3,2020,MI,FT,Product Data Analyst,20000,USD,20000,HN,0,HN,S,1.000000
4,4,2020,SE,FT,Machine Learning Engineer,150000,USD,150000,US,50,US,L,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
602,602,2022,SE,FT,Data Engineer,154000,USD,154000,US,100,US,M,1.000000
603,603,2022,SE,FT,Data Engineer,126000,USD,126000,US,100,US,M,1.000000
604,604,2022,SE,FT,Data Analyst,129000,USD,129000,US,0,US,M,1.000000
605,605,2022,SE,FT,Data Analyst,150000,USD,150000,US,100,US,M,1.000000


In [150]:
# проверяем, что все сработало корректно и все пропорции найдены:
data_m['proportion'].nunique() == data_m['salary_currency'].nunique()

# делаем столбец зп в долларарх
data_m['salary_USD'] = (data_m['salary']*data_m['proportion']).round(2)

display(data_m)

Unnamed: 0.1,Unnamed: 0,work_year,experience_level,employment_type,job_title,salary,salary_currency,salary_in_usd,employee_residence,remote_ratio,company_location,company_size,proportion,salary_USD
0,0,2020,MI,FT,Data Scientist,70000,EUR,79833,DE,0,DE,L,1.102293,77160.49
1,1,2020,SE,FT,Machine Learning Scientist,260000,USD,260000,JP,0,JP,S,1.000000,260000.00
2,2,2020,SE,FT,Big Data Engineer,85000,GBP,109024,GB,50,GB,M,1.258336,106958.60
3,3,2020,MI,FT,Product Data Analyst,20000,USD,20000,HN,0,HN,S,1.000000,20000.00
4,4,2020,SE,FT,Machine Learning Engineer,150000,USD,150000,US,50,US,L,1.000000,150000.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
602,602,2022,SE,FT,Data Engineer,154000,USD,154000,US,100,US,M,1.000000,154000.00
603,603,2022,SE,FT,Data Engineer,126000,USD,126000,US,100,US,M,1.000000,126000.00
604,604,2022,SE,FT,Data Analyst,129000,USD,129000,US,0,US,M,1.000000,129000.00
605,605,2022,SE,FT,Data Analyst,150000,USD,150000,US,100,US,M,1.000000,150000.00
