# Adjusting currencies 

## Extracting the prices from Barcelona json file

In [1]:
import json
import pandas as pd
import os

#Open the madrid coworkings csv
path = '/workspaces/Coworking/src/results/Barcelona/Barcelona_coworking_spaces.json'

with open(path, 'r', encoding='utf-8') as file:
    data = json.load(file)

df = pd.json_normalize(data)

df_barcelona_price= df[['price']]

In [2]:
import re

# Function to extract the price
def extract_price(text):
    if pd.isna(text):  # Handle missing values
        return None
    match = re.search(r'(\d+)', text)  # Extract the first number
    return int(match.group(1)) if match else None  # Convert to int if found

# Apply the function to the 'price' column
df_barcelona_price['price_cleaned'] = df_barcelona_price['price'].apply(extract_price)

# Display the results
print(df_barcelona_price[['price', 'price_cleaned']])

                                    price  price_cleaned
0    Private Office\n from € 1214\n/month         1214.0
1     Private Office\n from € 305\n/month          305.0
2                           1\n2\n3\n4\n5            1.0
3     Private Office\n from € 155\n/month          155.0
4     Private Office\n from € 345\n/month          345.0
..                                    ...            ...
182     Private Office \nPrice on request            NaN
183     Private Office \nPrice on request            NaN
184                1\n2\n3\n4\n5\n6\n7\n8            1.0
185     Private Office \nPrice on request            NaN
186     Private Office \nPrice on request            NaN

[187 rows x 2 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_barcelona_price['price_cleaned'] = df_barcelona_price['price'].apply(extract_price)


## Extract the price from Madrid json file.

In [3]:
import json
import pandas as pd
import os

#Open the madrid coworkings csv
path = '/workspaces/Coworking/src/results/Madrid/Madrid_coworking_spaces.json'

with open(path, 'r', encoding='utf-8') as file:
    data = json.load(file)

df = pd.json_normalize(data)

df_madrid_price= df[['price']]

In [4]:
import re

# Function to extract the price
def extract_price(text):
    if pd.isna(text):  # Handle missing values
        return None
    match = re.search(r'(\d+)', text)  # Extract the first number
    return int(match.group(1)) if match else None  # Convert to int if found

# Apply the function to the 'price' column
df_madrid_price['price_cleaned'] = df_madrid_price['price'].apply(extract_price)

# Display the results
print(df_madrid_price[['price', 'price_cleaned']])

                                   price  price_cleaned
0    Private Office\n from € 490\n/month          490.0
1    Private Office\n from € 189\n/month          189.0
2    Private Office\n from € 315\n/month          315.0
3    Private Office\n from € 980\n/month          980.0
4    Private Office\n from € 225\n/month          225.0
..                                   ...            ...
180    Private Office \nPrice on request            NaN
181    Private Office \nPrice on request            NaN
182    Private Office \nPrice on request            NaN
183    Private Office \nPrice on request            NaN
184                        1\n2\n3\n4\n5            1.0

[185 rows x 2 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_madrid_price['price_cleaned'] = df_madrid_price['price'].apply(extract_price)


## Merge both prices dataframes.

In [5]:
import pandas as pd

# Concatenate both dataframes
df_euro = pd.concat([df_madrid_price, df_barcelona_price], ignore_index=True)

# Optionally, you can reset the index if needed
df_euro.reset_index(drop=True, inplace=True)

df_euro.head()

Unnamed: 0,price,price_cleaned
0,Private Office\n from € 490\n/month,490.0
1,Private Office\n from € 189\n/month,189.0
2,Private Office\n from € 315\n/month,315.0
3,Private Office\n from € 980\n/month,980.0
4,Private Office\n from € 225\n/month,225.0


In [6]:
df_euro.drop(columns='price', inplace=True)

df_euro.head()

Unnamed: 0,price_cleaned
0,490.0
1,189.0
2,315.0
3,980.0
4,225.0


### Transform NaN values with the median.

In [7]:
df_euro.isna().sum()

price_cleaned    27
dtype: int64

In [8]:
median_price = df_euro['price_cleaned'].median()
df_euro['price_cleaned'].fillna(median_price, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_euro['price_cleaned'].fillna(median_price, inplace=True)


In [9]:
df_euro.isna().sum()

price_cleaned    0
dtype: int64

## Convert Euro in USD and normalize

### Using ExchangeRate-API

In [10]:
import pandas as pd
import requests

# Define your API endpoint and your API key
api_key = "7352f0026f58a51e76be2471"  # Replace with your API key
api_url = f"https://v6.exchangerate-api.com/v6/{api_key}/latest/EUR"

# Fetch the EUR to USD conversion rate from the API
def get_eur_to_usd_rate():
    response = requests.get(api_url)
    data = response.json()
    
    # Check if the response is valid
    if data['result'] == 'success':
        return data['conversion_rates']['USD']
    else:
        raise Exception("Error fetching exchange rate from API")

# Convert EUR to USD
def convert_eur_to_usd(eur_price, conversion_rate):
    return eur_price * conversion_rate

# Fetch the conversion rate (EUR to USD)
conversion_rate = get_eur_to_usd_rate()

# Apply the conversion to the 'price' column
df_euro['price_usd'] = df_euro['price_cleaned'].apply(lambda x: convert_eur_to_usd(x, conversion_rate))

# Print the updated DataFrame
print(df_euro)


     price_cleaned  price_usd
0            490.0   529.9350
1            189.0   204.4035
2            315.0   340.6725
3            980.0  1059.8700
4            225.0   243.3375
..             ...        ...
367          250.0   270.3750
368          250.0   270.3750
369            1.0     1.0815
370          250.0   270.3750
371          250.0   270.3750

[372 rows x 2 columns]


In [11]:
df_euro.drop(columns='price_cleaned', inplace=True)

df_euro.head()

Unnamed: 0,price_usd
0,529.935
1,204.4035
2,340.6725
3,1059.87
4,243.3375


### The PPP factor of Spain is 1(LCU/Euro)(Local Currency Unit)

## Convert Yens in USD and use the PPP factor.

In [12]:
import json
import pandas as pd
import os

#Open the madrid coworkings csv
path = '/workspaces/Coworking/src/results/Tokyo/tokyo_coworking_spaces.json'

with open(path, 'r', encoding='utf-8') as file:
    data = json.load(file)

df = pd.json_normalize(data)

df_tokyo_price= df[['price']]

In [13]:
import re

df_tokyo_price['price_cleaned'] = df_tokyo_price['price'].apply(extract_price)

# Display the results
print(df_tokyo_price[['price', 'price_cleaned']])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tokyo_price['price_cleaned'] = df_tokyo_price['price'].apply(extract_price)


                                     price  price_cleaned
0    Private Office\n from ¥ 55500\n/month        55500.0
1    Private Office\n from ¥ 69900\n/month        69900.0
2    Private Office\n from ¥ 43000\n/month        43000.0
3    Private Office\n from ¥ 58500\n/month        58500.0
4    Private Office\n from ¥ 71900\n/month        71900.0
..                                     ...            ...
191      Private Office \nPrice on request            NaN
192                          1\n2\n3\n4\n5            1.0
193      Private Office \nPrice on request            NaN
194      Private Office \nPrice on request            NaN
195      Private Office \nPrice on request            NaN

[196 rows x 2 columns]


In [14]:
df_tokyo_price.isna().sum()

price             0
price_cleaned    25
dtype: int64

In [15]:
median_price = df_tokyo_price['price_cleaned'].median()
df_tokyo_price['price_cleaned'].fillna(median_price, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_tokyo_price['price_cleaned'].fillna(median_price, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tokyo_price['price_cleaned'].fillna(median_price, inplace=True)


In [16]:
df_tokyo_price.drop(columns='price', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tokyo_price.drop(columns='price', inplace=True)


In [17]:
import pandas as pd
import requests

# Define your API endpoint and your API key
api_key = "7352f0026f58a51e76be2471"  # Replace with your API key
api_url = f"https://v6.exchangerate-api.com/v6/{api_key}/latest/EUR"

# Fetch the EUR to USD conversion rate from the API
def get_eur_to_usd_rate():
    response = requests.get(api_url)
    data = response.json()
    
    # Check if the response is valid
    if data['result'] == 'success':
        return data['conversion_rates']['USD']
    else:
        raise Exception("Error fetching exchange rate from API")

# Convert EUR to USD
def convert_eur_to_usd(eur_price, conversion_rate):
    return eur_price * conversion_rate

# Fetch the conversion rate (EUR to USD)
conversion_rate = get_eur_to_usd_rate()

# Apply the conversion to the 'price' column
df_tokyo_price['price_usd'] = df_tokyo_price['price_cleaned'].apply(lambda x: convert_eur_to_usd(x, conversion_rate))

# Print the updated DataFrame
print(df_tokyo_price)


     price_cleaned   price_usd
0          55500.0  60023.2500
1          69900.0  75596.8500
2          43000.0  46504.5000
3          58500.0  63267.7500
4          71900.0  77759.8500
..             ...         ...
191        35000.0  37852.5000
192            1.0      1.0815
193        35000.0  37852.5000
194        35000.0  37852.5000
195        35000.0  37852.5000

[196 rows x 2 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tokyo_price['price_usd'] = df_tokyo_price['price_cleaned'].apply(lambda x: convert_eur_to_usd(x, conversion_rate))


In [18]:
df_tokyo_price.drop(columns='price_cleaned', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tokyo_price.drop(columns='price_cleaned', inplace=True)


### The PPP of Japan is 95(LSU/USD)

In [19]:
df_tokyo_price['price_usd'] = df_tokyo_price['price_usd']*95

df_tokyo_price.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tokyo_price['price_usd'] = df_tokyo_price['price_usd']*95


Unnamed: 0,price_usd
0,5702208.75
1,7181700.75
2,4417927.5
3,6010436.25
4,7387185.75


## Convert Real into USD and use the PPP factor.

In [20]:
import json
import pandas as pd
import os

#Open Sao Paulo json
path = '/workspaces/Coworking/src/results/Sao Paulo/sp_coworking_spaces.json'

with open(path, 'r', encoding='utf-8') as file:
    data = json.load(file)

df = pd.json_normalize(data)

df_brazil_price= df[['price']]

In [21]:
import re

# Function to extract the price
def extract_price(text):
    if pd.isna(text):  # Handle missing values
        return None
    match = re.search(r'(\d+)', text)  # Extract the first number
    return int(match.group(1)) if match else None  # Convert to int if found

# Apply the function to the 'price' column
df_brazil_price['price_cleaned'] = df_brazil_price['price'].apply(extract_price)

# Display the results
print(df_brazil_price[['price', 'price_cleaned']])

                                     price  price_cleaned
0    Private Office\n from R$ 1385\n/month         1385.0
1    Private Office\n from R$ 1015\n/month         1015.0
2    Private Office\n from R$ 1565\n/month         1565.0
3    Private Office\n from R$ 1389\n/month         1389.0
4    Private Office\n from R$ 1899\n/month         1899.0
..                                     ...            ...
133      Private Office \nPrice on request            NaN
134      Private Office \nPrice on request            NaN
135                       1\n2\n3\n4\n5\n6            1.0
136      Private Office \nPrice on request            NaN
137      Private Office \nPrice on request            NaN

[138 rows x 2 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_brazil_price['price_cleaned'] = df_brazil_price['price'].apply(extract_price)


In [22]:
df_brazil_price.isna().sum()

price             0
price_cleaned    32
dtype: int64

In [23]:
median_price = df_brazil_price['price_cleaned'].median()
df_brazil_price['price_cleaned'].fillna(median_price, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_brazil_price['price_cleaned'].fillna(median_price, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_brazil_price['price_cleaned'].fillna(median_price, inplace=True)


In [24]:
df_brazil_price = df_brazil_price[df_brazil_price['price_cleaned'] != 1]


In [25]:
df_brazil_price.drop(columns='price', inplace=True)

## Load New York prices.

In [26]:
import pandas as pd

#Open New York csv
path = '/workspaces/Coworking/src/results/New York/New_York_coworking_spaces.csv'

df_newyork_price = pd.read_csv(path)

In [34]:
import re

# Function to extract the price
def extract_price(text):
    if pd.isna(text):  # Handle missing values
        return None
    match = re.search(r'(\d+)', text)  # Extract the first number
    return int(match.group(1)) if match else None  # Convert to int if found

# Apply the function to create a new column 'price_cleaned'
df_newyork_price['price_cleaned'] = df_newyork_price['price'].apply(extract_price)

# Display the results
print(df_newyork_price[['price', 'price_cleaned']].head())

                                  price  price_cleaned
0                       Price not found            NaN
1                               POPULAR            NaN
2  Private Office\n from $ 3875\n/month         3875.0
3  Private Office\n from $ 1600\n/month         1600.0
4                       Price not found            NaN


In [36]:
df_newyork_price.isna().sum()

url                0
name               3
address          426
description        0
price              0
price_cleaned    151
dtype: int64

In [30]:
df_newyork_price.dropna()

Unnamed: 0,url,name,address,description,price


## Merge all the prices.

In [31]:
import pandas as pd

df_prices = pd.concat([df_euro, df_tokyo_price, df_brazil_price, df_newyork_price]).stack().reset_index(drop=True).to_frame(name='price')

df_prices.head()

Unnamed: 0,price
0,529.935
1,204.4035
2,340.6725
3,1059.87
4,243.3375


In [32]:
df_prices.shape

(2394, 1)