In [8]:
#conda install --upgrade bottleneck

In [1]:
import pandas as pd
import numpy as np
import re


1. Austria

In [2]:
as_offers = pd.read_csv("Austria.csv")
as_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Euro norm,Body type,Color,City,Power,Transmission,Contact
0,"56,890 EUR",Ford,Mustang,5.0,"25,800 km",2020,Petrol,6.0,Coupe,Red,Salzburg,335 kW (456 HP),Automatic,https://www.zweispurig.at/ford-mustang-gebrauc...
1,"59,940 EUR",Alfa Romeo,Tonale,1.3,10 km,2024,Hybrid,,Crossover,Red,Amstetten,208 kW (283 HP),Automatic,https://www.zweispurig.at/alfa-romeo-tonale-vo...
2,"74,900 EUR",Audi,Q5,2.0 40 TDI quattro,"12,600 km",2023,Diesel,6.0,Suv,Black,Eisenstadt,152 kW (207 HP),Automatic,https://www.dasweltauto.at/vehicle/20311634
3,"35,999 EUR",Bmw,X4,,"133,000 km",2017,Diesel,6.0,Suv,Black,Steyr,233 kW (317 HP),Automatic,https://www.zweispurig.at/bmw-x4-gebrauchtwage...
4,"24,990 EUR",Mitsubishi,Pajero,3.2 did,"149,900 km",2013,Diesel,5.0,Suv,White,Stockerau,149 kW (203 HP),Automatic,https://www.zweispurig.at/mitsubishi-pajero-ge...


In [3]:
# check the data shape
as_offers.shape

(260, 14)

In [4]:
# Check data type
as_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Price         260 non-null    object 
 1   Make          260 non-null    object 
 2   Model         260 non-null    object 
 3   Trim          247 non-null    object 
 4   Mi            260 non-null    object 
 5   Year          260 non-null    int64  
 6   Fuel type     260 non-null    object 
 7   Euro norm     197 non-null    float64
 8   Body type     260 non-null    object 
 9   Color         260 non-null    object 
 10  City          260 non-null    object 
 11  Power         260 non-null    object 
 12  Transmission  260 non-null    object 
 13  Contact       260 non-null    object 
dtypes: float64(1), int64(1), object(12)
memory usage: 28.6+ KB


Kilowatts are a metric unit used to measure power - in this case, the rate at which a car's engine turns the energy stored in fossil fuels into movement. Usually, the more kilowatts an engine produces, the faster a car will accelerate.

Power is measured in horsepower (hp) or in Watts (kW), the two units are directly proportional. The hp measure is a historical one based on old imperial units (like inches, feet and miles), the kW measure is from the metric (SI) system which is an attempt to bring some global conformity. In Europe, the metric system (kW) is by-and-large the norm, while in the US, the hp is the unit of choice.

In [5]:
# Price needs to be modified by removing the "EUR" from the string value and transform it into numeric value
# "Mi" also needs to remove the "km" in the end and "," in the middle and turns into numeric value
# We'll first split the column "Power" and only keep kW in numerical type


In [6]:
# Define a function for each of the change point I want to make
def clean_price(row):
    cleaned_row = int(str(row).replace(',', '').replace('EUR', ''))
    return cleaned_row

In [7]:
def power_kW_split(row):
    matches = re.findall(r'(\d+(\.\d+)?)\s*kW', str(row))
    
    # Check if there are any matches before attempting to access the first element
    if matches:
        kW_value = float(matches[0][0]) 
        return kW_value
    else:
        return None


In [8]:
def clean_mi(row):
    cleaned_row = int(str(row).replace(',', '').replace('km', ''))
    return cleaned_row

In [9]:
#calculate based on the year of each norm
def calculate_euro_norm(row):
    if row >= 2014:
        return 6
    elif row >= 2009:
        return 5
    elif row >=2005:
        return 4
    elif row >= 2000:
        return 3
    elif row >= 1996:
        return 2
    else:
        return 1

In [10]:
as_offers['Price(EUR)'] = as_offers['Price'].apply(clean_price)
as_offers['Mileage(km)'] = as_offers['Mi'].apply(clean_mi)
as_offers['kW'] = as_offers['Power'].apply(power_kW_split)
as_offers['Emission standard'] = as_offers['Year'].apply(calculate_euro_norm)

In [11]:
as_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Euro norm,Body type,Color,City,Power,Transmission,Contact,Price(EUR),Mileage(km),kW,Emission standard
0,"56,890 EUR",Ford,Mustang,5.0,"25,800 km",2020,Petrol,6.0,Coupe,Red,Salzburg,335 kW (456 HP),Automatic,https://www.zweispurig.at/ford-mustang-gebrauc...,56890,25800,335.0,6
1,"59,940 EUR",Alfa Romeo,Tonale,1.3,10 km,2024,Hybrid,,Crossover,Red,Amstetten,208 kW (283 HP),Automatic,https://www.zweispurig.at/alfa-romeo-tonale-vo...,59940,10,208.0,6
2,"74,900 EUR",Audi,Q5,2.0 40 TDI quattro,"12,600 km",2023,Diesel,6.0,Suv,Black,Eisenstadt,152 kW (207 HP),Automatic,https://www.dasweltauto.at/vehicle/20311634,74900,12600,152.0,6
3,"35,999 EUR",Bmw,X4,,"133,000 km",2017,Diesel,6.0,Suv,Black,Steyr,233 kW (317 HP),Automatic,https://www.zweispurig.at/bmw-x4-gebrauchtwage...,35999,133000,233.0,6
4,"24,990 EUR",Mitsubishi,Pajero,3.2 did,"149,900 km",2013,Diesel,5.0,Suv,White,Stockerau,149 kW (203 HP),Automatic,https://www.zweispurig.at/mitsubishi-pajero-ge...,24990,149900,149.0,5


In [12]:
# Now the columns of "Price", "Mi", 'Euro norm' and "Power" can be deleted
as_offers.drop(columns = ["Price", "Mi", "Power", 'Euro norm'], inplace = True)

In [13]:
as_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Price(EUR),Mileage(km),kW,Emission standard
0,Ford,Mustang,5.0,2020,Petrol,Coupe,Red,Salzburg,Automatic,https://www.zweispurig.at/ford-mustang-gebrauc...,56890,25800,335.0,6
1,Alfa Romeo,Tonale,1.3,2024,Hybrid,Crossover,Red,Amstetten,Automatic,https://www.zweispurig.at/alfa-romeo-tonale-vo...,59940,10,208.0,6
2,Audi,Q5,2.0 40 TDI quattro,2023,Diesel,Suv,Black,Eisenstadt,Automatic,https://www.dasweltauto.at/vehicle/20311634,74900,12600,152.0,6
3,Bmw,X4,,2017,Diesel,Suv,Black,Steyr,Automatic,https://www.zweispurig.at/bmw-x4-gebrauchtwage...,35999,133000,233.0,6
4,Mitsubishi,Pajero,3.2 did,2013,Diesel,Suv,White,Stockerau,Automatic,https://www.zweispurig.at/mitsubishi-pajero-ge...,24990,149900,149.0,5


In [14]:
as_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               260 non-null    object 
 1   Model              260 non-null    object 
 2   Trim               247 non-null    object 
 3   Year               260 non-null    int64  
 4   Fuel type          260 non-null    object 
 5   Body type          260 non-null    object 
 6   Color              260 non-null    object 
 7   City               260 non-null    object 
 8   Transmission       260 non-null    object 
 9   Contact            260 non-null    object 
 10  Price(EUR)         260 non-null    int64  
 11  Mileage(km)        260 non-null    int64  
 12  kW                 260 non-null    float64
 13  Emission standard  260 non-null    int64  
dtypes: float64(1), int64(4), object(9)
memory usage: 28.6+ KB


In [15]:
# As we can see there are some missing values
as_offers.isna().sum()

Make                  0
Model                 0
Trim                 13
Year                  0
Fuel type             0
Body type             0
Color                 0
City                  0
Transmission          0
Contact               0
Price(EUR)            0
Mileage(km)           0
kW                    0
Emission standard     0
dtype: int64

In [16]:
# Drop the missing value for "Trim"
as_offers.dropna(inplace=True)
as_offers.isna().sum()


Make                 0
Model                0
Trim                 0
Year                 0
Fuel type            0
Body type            0
Color                0
City                 0
Transmission         0
Contact              0
Price(EUR)           0
Mileage(km)          0
kW                   0
Emission standard    0
dtype: int64

"Euro norm" refers to the European emission standards or regulations that set limits on the amount of pollutants that vehicles are allowed to emit. 
The Euro norms are a series of regulations that specify the acceptable limits for various pollutants, including nitrogen oxides (NOx), particulate matter (PM), carbon monoxide (CO), hydrocarbons (HC), and others. These standards have been progressively tightened over the years to encourage the development and adoption of cleaner and more environmentally friendly vehicle technologies.

For example, Euro 1, Euro 2, Euro 3, and so on, represent different stages of emission standards. As vehicles advance through these stages, they must comply with stricter emission limits. The Euro 6 standard, which was implemented in 2014 for passenger cars, is one of the latest and most stringent standards, setting strict limits on NOx and other pollutants.

In [17]:
as_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Price(EUR),Mileage(km),kW,Emission standard
0,Ford,Mustang,5.0,2020,Petrol,Coupe,Red,Salzburg,Automatic,https://www.zweispurig.at/ford-mustang-gebrauc...,56890,25800,335.0,6
1,Alfa Romeo,Tonale,1.3,2024,Hybrid,Crossover,Red,Amstetten,Automatic,https://www.zweispurig.at/alfa-romeo-tonale-vo...,59940,10,208.0,6
2,Audi,Q5,2.0 40 TDI quattro,2023,Diesel,Suv,Black,Eisenstadt,Automatic,https://www.dasweltauto.at/vehicle/20311634,74900,12600,152.0,6
4,Mitsubishi,Pajero,3.2 did,2013,Diesel,Suv,White,Stockerau,Automatic,https://www.zweispurig.at/mitsubishi-pajero-ge...,24990,149900,149.0,5
5,Lexus,Nx,300h,2021,Petrol,Suv,Black,Enns,Automatic,https://www.zweispurig.at/lexus-nx-gebrauchtwa...,40500,27726,146.0,6


In [18]:
# Let's check "Transmission"
as_offers['Transmission'].value_counts()


Transmission
Automatic         153
Manual             91
Semi Automatic      3
Name: count, dtype: int64

In [19]:
# create a new column with the country name "Austria"
as_offers['Country'] = "Austria" 

In [20]:
as_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Price(EUR),Mileage(km),kW,Emission standard,Country
0,Ford,Mustang,5.0,2020,Petrol,Coupe,Red,Salzburg,Automatic,https://www.zweispurig.at/ford-mustang-gebrauc...,56890,25800,335.0,6,Austria
1,Alfa Romeo,Tonale,1.3,2024,Hybrid,Crossover,Red,Amstetten,Automatic,https://www.zweispurig.at/alfa-romeo-tonale-vo...,59940,10,208.0,6,Austria
2,Audi,Q5,2.0 40 TDI quattro,2023,Diesel,Suv,Black,Eisenstadt,Automatic,https://www.dasweltauto.at/vehicle/20311634,74900,12600,152.0,6,Austria
4,Mitsubishi,Pajero,3.2 did,2013,Diesel,Suv,White,Stockerau,Automatic,https://www.zweispurig.at/mitsubishi-pajero-ge...,24990,149900,149.0,5,Austria
5,Lexus,Nx,300h,2021,Petrol,Suv,Black,Enns,Automatic,https://www.zweispurig.at/lexus-nx-gebrauchtwa...,40500,27726,146.0,6,Austria


In [21]:
# Save it to a new csv
as_offers.to_csv('cleaned_Austria_offers.csv', index=False)

2. Belgium

In [22]:
be_offers = pd.read_csv('Belgium.csv')
be_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Euro norm,Body type,Color,City,Power,Transmission,Contact
0,"23,990 EUR",Audi,A1,1.0 25 TFSI,"36,854 km",2022,Petrol,6.0,Mini,Gray,Genk,70 kW (95 HP),Automatic,https://www.vroom.be/fr/voitures-occasion/audi...
1,"25,990 EUR",Audi,A1,1.0 25 TFSI,"29,887 km",2022,Petrol,6.0,Mini,Gray,Genk,70 kW (95 HP),Automatic,https://www.vroom.be/fr/voitures-occasion/audi...
2,"19,990 EUR",Ford,Kuga,1.5,"60,820 km",2019,Petrol,9.0,Suv,Black,Rotselaar,111 kW (151 HP),Manual,https://www.autogids.be/detail-id--9464011--fo...
3,"38,995 EUR",Bmw,X6,,"86,905 km",2017,Diesel,6.0,Suv,Gray,Brugge,157 kW (214 HP),Automatic,https://www.vroom.be/fr/voitures-occasion/bmw-...
4,"23,725 EUR",Ford,Focus,1.0 st,"26,648 km",2021,Petrol,,Hatchback,Gray,Boortmeerbeek,93 kW (126 HP),Manual,https://www.vroom.be/fr/voitures-occasion/ford...


In [23]:
be_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Price         260 non-null    object 
 1   Make          260 non-null    object 
 2   Model         260 non-null    object 
 3   Trim          213 non-null    object 
 4   Mi            260 non-null    object 
 5   Year          260 non-null    int64  
 6   Fuel type     260 non-null    object 
 7   Euro norm     224 non-null    float64
 8   Body type     260 non-null    object 
 9   Color         260 non-null    object 
 10  City          260 non-null    object 
 11  Power         260 non-null    object 
 12  Transmission  260 non-null    object 
 13  Contact       260 non-null    object 
dtypes: float64(1), int64(1), object(12)
memory usage: 28.6+ KB


In [24]:
be_offers['Price(EUR)'] = be_offers['Price'].apply(clean_price)
be_offers['Mileage(km)'] = be_offers['Mi'].apply(clean_mi)
be_offers['kW'] = be_offers['Power'].apply(power_kW_split)
be_offers['Emission standard'] = be_offers['Year'].apply(calculate_euro_norm)

In [25]:
be_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Euro norm,Body type,Color,City,Power,Transmission,Contact,Price(EUR),Mileage(km),kW,Emission standard
0,"23,990 EUR",Audi,A1,1.0 25 TFSI,"36,854 km",2022,Petrol,6.0,Mini,Gray,Genk,70 kW (95 HP),Automatic,https://www.vroom.be/fr/voitures-occasion/audi...,23990,36854,70.0,6
1,"25,990 EUR",Audi,A1,1.0 25 TFSI,"29,887 km",2022,Petrol,6.0,Mini,Gray,Genk,70 kW (95 HP),Automatic,https://www.vroom.be/fr/voitures-occasion/audi...,25990,29887,70.0,6
2,"19,990 EUR",Ford,Kuga,1.5,"60,820 km",2019,Petrol,9.0,Suv,Black,Rotselaar,111 kW (151 HP),Manual,https://www.autogids.be/detail-id--9464011--fo...,19990,60820,111.0,6
3,"38,995 EUR",Bmw,X6,,"86,905 km",2017,Diesel,6.0,Suv,Gray,Brugge,157 kW (214 HP),Automatic,https://www.vroom.be/fr/voitures-occasion/bmw-...,38995,86905,157.0,6
4,"23,725 EUR",Ford,Focus,1.0 st,"26,648 km",2021,Petrol,,Hatchback,Gray,Boortmeerbeek,93 kW (126 HP),Manual,https://www.vroom.be/fr/voitures-occasion/ford...,23725,26648,93.0,6


In [26]:
be_offers.isna().sum()

Price                 0
Make                  0
Model                 0
Trim                 47
Mi                    0
Year                  0
Fuel type             0
Euro norm            36
Body type             0
Color                 0
City                  0
Power                 0
Transmission          0
Contact               0
Price(EUR)            0
Mileage(km)           0
kW                    0
Emission standard     0
dtype: int64

In [27]:
# Romove the original columns "Price", "Mi",'Euro norm',"Power"
be_offers.drop(columns=["Price", "Mi", "Euro norm", "Power"], inplace = True)

In [28]:
be_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               260 non-null    object 
 1   Model              260 non-null    object 
 2   Trim               213 non-null    object 
 3   Year               260 non-null    int64  
 4   Fuel type          260 non-null    object 
 5   Body type          260 non-null    object 
 6   Color              260 non-null    object 
 7   City               260 non-null    object 
 8   Transmission       260 non-null    object 
 9   Contact            260 non-null    object 
 10  Price(EUR)         260 non-null    int64  
 11  Mileage(km)        260 non-null    int64  
 12  kW                 260 non-null    float64
 13  Emission standard  260 non-null    int64  
dtypes: float64(1), int64(4), object(9)
memory usage: 28.6+ KB


In [29]:
be_offers.isna().sum()

Make                  0
Model                 0
Trim                 47
Year                  0
Fuel type             0
Body type             0
Color                 0
City                  0
Transmission          0
Contact               0
Price(EUR)            0
Mileage(km)           0
kW                    0
Emission standard     0
dtype: int64

In [30]:
# delete the missing values in "Trim" and "kW"
be_offers.dropna(subset= ['Trim'], inplace=True)

In [31]:
be_offers.isna().sum()

Make                 0
Model                0
Trim                 0
Year                 0
Fuel type            0
Body type            0
Color                0
City                 0
Transmission         0
Contact              0
Price(EUR)           0
Mileage(km)          0
kW                   0
Emission standard    0
dtype: int64

In [32]:
be_offers['Transmission'].value_counts()

Transmission
Automatic    174
Manual        39
Name: count, dtype: int64

In [33]:
be_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 213 entries, 0 to 259
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               213 non-null    object 
 1   Model              213 non-null    object 
 2   Trim               213 non-null    object 
 3   Year               213 non-null    int64  
 4   Fuel type          213 non-null    object 
 5   Body type          213 non-null    object 
 6   Color              213 non-null    object 
 7   City               213 non-null    object 
 8   Transmission       213 non-null    object 
 9   Contact            213 non-null    object 
 10  Price(EUR)         213 non-null    int64  
 11  Mileage(km)        213 non-null    int64  
 12  kW                 213 non-null    float64
 13  Emission standard  213 non-null    int64  
dtypes: float64(1), int64(4), object(9)
memory usage: 25.0+ KB


In [34]:
#add another column "Country"
be_offers['Country'] = "Belgium"

In [35]:
be_offers.head(20)

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Price(EUR),Mileage(km),kW,Emission standard,Country
0,Audi,A1,1.0 25 TFSI,2022,Petrol,Mini,Gray,Genk,Automatic,https://www.vroom.be/fr/voitures-occasion/audi...,23990,36854,70.0,6,Belgium
1,Audi,A1,1.0 25 TFSI,2022,Petrol,Mini,Gray,Genk,Automatic,https://www.vroom.be/fr/voitures-occasion/audi...,25990,29887,70.0,6,Belgium
2,Ford,Kuga,1.5,2019,Petrol,Suv,Black,Rotselaar,Manual,https://www.autogids.be/detail-id--9464011--fo...,19990,60820,111.0,6,Belgium
4,Ford,Focus,1.0 st,2021,Petrol,Hatchback,Gray,Boortmeerbeek,Manual,https://www.vroom.be/fr/voitures-occasion/ford...,23725,26648,93.0,6,Belgium
5,Audi,A4,2.0,2021,Diesel,Wagon,Black,Genk,Automatic,https://www.autogids.be/detail-id--9555591--au...,38500,21947,121.0,6,Belgium
6,Mercedes Benz,Eqe,AMG43,2022,Electric,Sedan,White,Huy,Automatic,https://www.autogids.be/detail-id--9556842--me...,78990,19744,354.0,6,Belgium
7,Audi,Q5,2.0,2020,Diesel,Suv,Blue,Leuze En Hainaut,Automatic,https://www.autogids.be/detail-id--9533167--au...,37990,76764,101.0,6,Belgium
8,Nissan,X-Trail,1.3,2018,Petrol,Suv,White,Keerbergen,Manual,https://www.autogids.be/detail-id--9529074--ni...,20390,83257,121.0,6,Belgium
9,Volkswagen,Tiguan,1.5,2024,Petrol,Crossover,Black,Tournai,Automatic,https://www.autotrends.be/fr/occasions/voiture...,46990,9,111.0,6,Belgium
11,Audi,A5,2.0,2017,Diesel,Convertible,Gray,Sint Niklaas,Automatic,https://www.vroom.be/fr/voitures-occasion/audi...,29990,77150,141.0,6,Belgium


In [36]:
be_offers.to_csv('cleaned_Belgium_offers.csv', index=False)

3. France

In [37]:
fr_offers = pd.read_csv('France.csv')
fr_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"18,990 EUR",Renault,Kadjar,1.3 TCe,"53,407 km",2020,Petrol,Suv,Black,Le Blanc,104 kW (141 HP),Manual,https://occasion.autoplus.fr/voiture-occasion-...
1,"24,990 EUR",Hyundai,Kona,electric,"61,444 km",2021,Electric,Crossover,Blue,Tours,152 kW (207 HP),Automatic,https://www.auto-selection.com/voiture-occasio...
2,"27,900 EUR",Porsche,Cayenne,3.0 diesel,"142,000 km",2013,Diesel,Suv,Gray,Nice,182 kW (248 HP),Automatic,https://www.leparking.fr/voiture-occasion-deta...
3,"23,890 EUR",Seat,Ateca,1.5 tsi,"38,258 km",2020,Petrol,Suv,Brown,Beauvais,111 kW (151 HP),Automatic,https://fr.renew.auto/achat-vehicules-occasion...
4,"28,450 EUR",Toyota,Yaris,1.5,"6,780 km",2024,Hybrid,Sedan,Blue,Rivery,68 kW (92 HP),Automatic,https://occasion.autoplus.fr/voiture-occasion-...


In [38]:
fr_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Price         260 non-null    object
 1   Make          260 non-null    object
 2   Model         260 non-null    object
 3   Trim          239 non-null    object
 4   Mi            260 non-null    object
 5   Year          260 non-null    int64 
 6   Fuel type     260 non-null    object
 7   Body type     260 non-null    object
 8   Color         260 non-null    object
 9   City          260 non-null    object
 10  Power         259 non-null    object
 11  Transmission  260 non-null    object
 12  Contact       260 non-null    object
dtypes: int64(1), object(12)
memory usage: 26.5+ KB


In [39]:
# Create a new column "Euro norm"  "Price", "Mi" and "Power"
fr_offers['Emission standard'] = fr_offers['Year'].apply(calculate_euro_norm)
fr_offers['Price(EUR)'] = fr_offers['Price'].apply(clean_price)
fr_offers['Mileage(km)'] = fr_offers['Mi'].apply(clean_mi)
fr_offers['kW'] = fr_offers['Power'].apply(power_kW_split)

In [40]:
fr_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW
0,"18,990 EUR",Renault,Kadjar,1.3 TCe,"53,407 km",2020,Petrol,Suv,Black,Le Blanc,104 kW (141 HP),Manual,https://occasion.autoplus.fr/voiture-occasion-...,6,18990,53407,104.0
1,"24,990 EUR",Hyundai,Kona,electric,"61,444 km",2021,Electric,Crossover,Blue,Tours,152 kW (207 HP),Automatic,https://www.auto-selection.com/voiture-occasio...,6,24990,61444,152.0
2,"27,900 EUR",Porsche,Cayenne,3.0 diesel,"142,000 km",2013,Diesel,Suv,Gray,Nice,182 kW (248 HP),Automatic,https://www.leparking.fr/voiture-occasion-deta...,5,27900,142000,182.0
3,"23,890 EUR",Seat,Ateca,1.5 tsi,"38,258 km",2020,Petrol,Suv,Brown,Beauvais,111 kW (151 HP),Automatic,https://fr.renew.auto/achat-vehicules-occasion...,6,23890,38258,111.0
4,"28,450 EUR",Toyota,Yaris,1.5,"6,780 km",2024,Hybrid,Sedan,Blue,Rivery,68 kW (92 HP),Automatic,https://occasion.autoplus.fr/voiture-occasion-...,6,28450,6780,68.0


In [41]:
# Now remove the original 3 columns
fr_offers.drop(columns=['Price', "Mi", 'Power'], inplace= True)

In [42]:
fr_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW
0,Renault,Kadjar,1.3 TCe,2020,Petrol,Suv,Black,Le Blanc,Manual,https://occasion.autoplus.fr/voiture-occasion-...,6,18990,53407,104.0
1,Hyundai,Kona,electric,2021,Electric,Crossover,Blue,Tours,Automatic,https://www.auto-selection.com/voiture-occasio...,6,24990,61444,152.0
2,Porsche,Cayenne,3.0 diesel,2013,Diesel,Suv,Gray,Nice,Automatic,https://www.leparking.fr/voiture-occasion-deta...,5,27900,142000,182.0
3,Seat,Ateca,1.5 tsi,2020,Petrol,Suv,Brown,Beauvais,Automatic,https://fr.renew.auto/achat-vehicules-occasion...,6,23890,38258,111.0
4,Toyota,Yaris,1.5,2024,Hybrid,Sedan,Blue,Rivery,Automatic,https://occasion.autoplus.fr/voiture-occasion-...,6,28450,6780,68.0


In [43]:
# Check missing values
fr_offers.isna().sum()

Make                  0
Model                 0
Trim                 21
Year                  0
Fuel type             0
Body type             0
Color                 0
City                  0
Transmission          0
Contact               0
Emission standard     0
Price(EUR)            0
Mileage(km)           0
kW                    1
dtype: int64

In [44]:
fr_offers.dropna(subset = ['Trim', 'kW'], inplace=True)

In [45]:
fr_offers.loc[fr_offers['City'].isna(), 'City'] = "NA"

In [46]:
fr_offers.isna().sum()

Make                 0
Model                0
Trim                 0
Year                 0
Fuel type            0
Body type            0
Color                0
City                 0
Transmission         0
Contact              0
Emission standard    0
Price(EUR)           0
Mileage(km)          0
kW                   0
dtype: int64

In [47]:
fr_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 239 entries, 0 to 259
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               239 non-null    object 
 1   Model              239 non-null    object 
 2   Trim               239 non-null    object 
 3   Year               239 non-null    int64  
 4   Fuel type          239 non-null    object 
 5   Body type          239 non-null    object 
 6   Color              239 non-null    object 
 7   City               239 non-null    object 
 8   Transmission       239 non-null    object 
 9   Contact            239 non-null    object 
 10  Emission standard  239 non-null    int64  
 11  Price(EUR)         239 non-null    int64  
 12  Mileage(km)        239 non-null    int64  
 13  kW                 239 non-null    float64
dtypes: float64(1), int64(4), object(9)
memory usage: 28.0+ KB


In [48]:
# Create a new column "Country"
fr_offers['Country'] = "France"

In [49]:
fr_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW,Country
0,Renault,Kadjar,1.3 TCe,2020,Petrol,Suv,Black,Le Blanc,Manual,https://occasion.autoplus.fr/voiture-occasion-...,6,18990,53407,104.0,France
1,Hyundai,Kona,electric,2021,Electric,Crossover,Blue,Tours,Automatic,https://www.auto-selection.com/voiture-occasio...,6,24990,61444,152.0,France
2,Porsche,Cayenne,3.0 diesel,2013,Diesel,Suv,Gray,Nice,Automatic,https://www.leparking.fr/voiture-occasion-deta...,5,27900,142000,182.0,France
3,Seat,Ateca,1.5 tsi,2020,Petrol,Suv,Brown,Beauvais,Automatic,https://fr.renew.auto/achat-vehicules-occasion...,6,23890,38258,111.0,France
4,Toyota,Yaris,1.5,2024,Hybrid,Sedan,Blue,Rivery,Automatic,https://occasion.autoplus.fr/voiture-occasion-...,6,28450,6780,68.0,France


In [50]:
fr_offers.to_csv('cleaned_France_offers.csv', index =False)

4. Germany

In [51]:
gr_offers = pd.read_csv('Germany.csv')
gr_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Euro norm,Body type,Color,City,Power,Transmission,Contact
0,"35,890 EUR",Mazda,Cx-5,2.5,"11,732 km",2023,Petrol,6.0,Crossover,Black,Hamburg,144 kW (196 HP),Automatic,https://www.moll-automobile.de/fahrzeug/385546...
1,"22,950 EUR",Hyundai,I20,1.0,"5,772 km",2022,Petrol,6.0,Hatchback,White,Iserlohn,75 kW (102 HP),Automatic,https://gebrauchtwagen.hyundai.de/de/fahrzeugs...
2,"24,890 EUR",Audi,A1,1.0 30 TFSI,"42,490 km",2019,Petrol,6.0,Mini,Yellow,Rain,85 kW (116 HP),Manual,https://www.12gebrauchtwagen.de/c/16/141360039...
3,"39,130 EUR",Audi,Q2,1.5 35 TFSI,"2,500 km",2024,Petrol,6.0,Crossover,Black,Rain,110 kW (150 HP),Automatic,https://www.12gebrauchtwagen.de/c/34/139870861...
4,"46,980 EUR",Volvo,Xc40,1.5,"14,839 km",2023,Hybrid,,Crossover,Gray,Aachen,194 kW (264 HP),Automatic,https://www.moll-automobile.de/fahrzeug/385196...


In [52]:
# Now change the "Price", "Mi" 'Euro norm' and "Power"
gr_offers['Price(EUR)'] = gr_offers['Price'].apply(clean_price)
gr_offers['Mileage(km)'] = gr_offers['Mi'].apply(clean_mi)
gr_offers['kW'] = gr_offers['Power'].apply(power_kW_split)
gr_offers['Emission standard'] =gr_offers['Year'].apply(calculate_euro_norm)

In [53]:
gr_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Euro norm,Body type,Color,City,Power,Transmission,Contact,Price(EUR),Mileage(km),kW,Emission standard
0,"35,890 EUR",Mazda,Cx-5,2.5,"11,732 km",2023,Petrol,6.0,Crossover,Black,Hamburg,144 kW (196 HP),Automatic,https://www.moll-automobile.de/fahrzeug/385546...,35890,11732,144.0,6
1,"22,950 EUR",Hyundai,I20,1.0,"5,772 km",2022,Petrol,6.0,Hatchback,White,Iserlohn,75 kW (102 HP),Automatic,https://gebrauchtwagen.hyundai.de/de/fahrzeugs...,22950,5772,75.0,6
2,"24,890 EUR",Audi,A1,1.0 30 TFSI,"42,490 km",2019,Petrol,6.0,Mini,Yellow,Rain,85 kW (116 HP),Manual,https://www.12gebrauchtwagen.de/c/16/141360039...,24890,42490,85.0,6
3,"39,130 EUR",Audi,Q2,1.5 35 TFSI,"2,500 km",2024,Petrol,6.0,Crossover,Black,Rain,110 kW (150 HP),Automatic,https://www.12gebrauchtwagen.de/c/34/139870861...,39130,2500,110.0,6
4,"46,980 EUR",Volvo,Xc40,1.5,"14,839 km",2023,Hybrid,,Crossover,Gray,Aachen,194 kW (264 HP),Automatic,https://www.moll-automobile.de/fahrzeug/385196...,46980,14839,194.0,6


In [54]:
gr_offers.drop(columns=['Price', 'Mi', 'Power'], inplace=True)

In [55]:
gr_offers.drop(columns=['Euro norm'], inplace=True)

In [56]:
gr_offers.isna().sum()

Make                  0
Model                 0
Trim                 16
Year                  0
Fuel type             0
Body type             0
Color                 0
City                  0
Transmission          0
Contact               0
Price(EUR)            0
Mileage(km)           0
kW                    0
Emission standard     0
dtype: int64

In [57]:
gr_offers = gr_offers.dropna()

In [58]:
gr_offers.isna().sum()

Make                 0
Model                0
Trim                 0
Year                 0
Fuel type            0
Body type            0
Color                0
City                 0
Transmission         0
Contact              0
Price(EUR)           0
Mileage(km)          0
kW                   0
Emission standard    0
dtype: int64

In [59]:
gr_offers['Transmission'].value_counts()

Transmission
Automatic    183
Manual        61
Name: count, dtype: int64

In [60]:
gr_offers['Country'] = "Germany"

In [61]:
gr_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Price(EUR),Mileage(km),kW,Emission standard,Country
0,Mazda,Cx-5,2.5,2023,Petrol,Crossover,Black,Hamburg,Automatic,https://www.moll-automobile.de/fahrzeug/385546...,35890,11732,144.0,6,Germany
1,Hyundai,I20,1.0,2022,Petrol,Hatchback,White,Iserlohn,Automatic,https://gebrauchtwagen.hyundai.de/de/fahrzeugs...,22950,5772,75.0,6,Germany
2,Audi,A1,1.0 30 TFSI,2019,Petrol,Mini,Yellow,Rain,Manual,https://www.12gebrauchtwagen.de/c/16/141360039...,24890,42490,85.0,6,Germany
3,Audi,Q2,1.5 35 TFSI,2024,Petrol,Crossover,Black,Rain,Automatic,https://www.12gebrauchtwagen.de/c/34/139870861...,39130,2500,110.0,6,Germany
4,Volvo,Xc40,1.5,2023,Hybrid,Crossover,Gray,Aachen,Automatic,https://www.moll-automobile.de/fahrzeug/385196...,46980,14839,194.0,6,Germany


In [62]:
gr_offers.to_csv('cleaned_Germany_offers.csv', index=False)

5. the Netherlands

In [63]:
nl_offers = pd.read_csv('Netherlands.csv')
nl_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact,Euro norm
0,"17,240 EUR",Nissan,Leaf,40 kWh,"92,555 km",2019,Electric,Hatchback,Silver,Tilburg,111 kW (151 HP),Automatic,https://www.autoscout24.nl/aanbod/nissan-leaf-...,
1,"21,740 EUR",Nissan,Leaf,e+ 62 kWh,"56,644 km",2019,Electric,Hatchback,Silver,Tilburg,162 kW (220 HP),Automatic,https://www.marktplaats.nl/v/auto-s/nissan/m20...,
2,"22,445 EUR",Bmw,X1,2.0 sdrive20i,"100,553 km",2015,Petrol,Suv,Silver,Middelburg,137 kW (186 HP),Automatic,https://www.zeelandnet.nl/prikbord/bmw-x1-sdri...,
3,"21,950 EUR",Nissan,Qashqai,1.2,"59,498 km",2018,Petrol,Crossover,Black,Haarlem,86 kW (117 HP),Automatic,https://usedcars.nissan.nl/nissan-voorraad/voe...,
4,"31,900 EUR",Nissan,Juke,1.0 DIG-T,"12,285 km",2023,Petrol,Crossover,White,Middelburg,85 kW (116 HP),Automatic,https://www.zeelandnet.nl/prikbord/nissan-juke...,


In [64]:
nl_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Price         260 non-null    object 
 1   Make          260 non-null    object 
 2   Model         260 non-null    object 
 3   Trim          257 non-null    object 
 4   Mi            260 non-null    object 
 5   Year          260 non-null    int64  
 6   Fuel type     260 non-null    object 
 7   Body type     260 non-null    object 
 8   Color         260 non-null    object 
 9   City          260 non-null    object 
 10  Power         260 non-null    object 
 11  Transmission  260 non-null    object 
 12  Contact       260 non-null    object 
 13  Euro norm     18 non-null     float64
dtypes: float64(1), int64(1), object(12)
memory usage: 28.6+ KB


In [65]:
nl_offers['Emission standard'] =nl_offers['Year'].apply(calculate_euro_norm)
nl_offers['Price(EUR)'] = nl_offers['Price'].apply(clean_price)
nl_offers['Mileage(km)'] = nl_offers['Mi'].apply(clean_mi)
nl_offers['kW'] = nl_offers['Power'].apply(power_kW_split)

In [66]:
nl_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact,Euro norm,Emission standard,Price(EUR),Mileage(km),kW
0,"17,240 EUR",Nissan,Leaf,40 kWh,"92,555 km",2019,Electric,Hatchback,Silver,Tilburg,111 kW (151 HP),Automatic,https://www.autoscout24.nl/aanbod/nissan-leaf-...,,6,17240,92555,111.0
1,"21,740 EUR",Nissan,Leaf,e+ 62 kWh,"56,644 km",2019,Electric,Hatchback,Silver,Tilburg,162 kW (220 HP),Automatic,https://www.marktplaats.nl/v/auto-s/nissan/m20...,,6,21740,56644,162.0
2,"22,445 EUR",Bmw,X1,2.0 sdrive20i,"100,553 km",2015,Petrol,Suv,Silver,Middelburg,137 kW (186 HP),Automatic,https://www.zeelandnet.nl/prikbord/bmw-x1-sdri...,,6,22445,100553,137.0
3,"21,950 EUR",Nissan,Qashqai,1.2,"59,498 km",2018,Petrol,Crossover,Black,Haarlem,86 kW (117 HP),Automatic,https://usedcars.nissan.nl/nissan-voorraad/voe...,,6,21950,59498,86.0
4,"31,900 EUR",Nissan,Juke,1.0 DIG-T,"12,285 km",2023,Petrol,Crossover,White,Middelburg,85 kW (116 HP),Automatic,https://www.zeelandnet.nl/prikbord/nissan-juke...,,6,31900,12285,85.0


In [67]:
nl_offers.drop(columns= ['Price', 'Mi', 'Euro norm', 'Power'], inplace= True)

In [68]:
nl_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW
0,Nissan,Leaf,40 kWh,2019,Electric,Hatchback,Silver,Tilburg,Automatic,https://www.autoscout24.nl/aanbod/nissan-leaf-...,6,17240,92555,111.0
1,Nissan,Leaf,e+ 62 kWh,2019,Electric,Hatchback,Silver,Tilburg,Automatic,https://www.marktplaats.nl/v/auto-s/nissan/m20...,6,21740,56644,162.0
2,Bmw,X1,2.0 sdrive20i,2015,Petrol,Suv,Silver,Middelburg,Automatic,https://www.zeelandnet.nl/prikbord/bmw-x1-sdri...,6,22445,100553,137.0
3,Nissan,Qashqai,1.2,2018,Petrol,Crossover,Black,Haarlem,Automatic,https://usedcars.nissan.nl/nissan-voorraad/voe...,6,21950,59498,86.0
4,Nissan,Juke,1.0 DIG-T,2023,Petrol,Crossover,White,Middelburg,Automatic,https://www.zeelandnet.nl/prikbord/nissan-juke...,6,31900,12285,85.0


In [69]:
nl_offers.isna().sum()

Make                 0
Model                0
Trim                 3
Year                 0
Fuel type            0
Body type            0
Color                0
City                 0
Transmission         0
Contact              0
Emission standard    0
Price(EUR)           0
Mileage(km)          0
kW                   0
dtype: int64

In [70]:
nl_offers= nl_offers.dropna()

In [71]:
# Set display options to show more rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [72]:
nl_offers.isna().sum()

Make                 0
Model                0
Trim                 0
Year                 0
Fuel type            0
Body type            0
Color                0
City                 0
Transmission         0
Contact              0
Emission standard    0
Price(EUR)           0
Mileage(km)          0
kW                   0
dtype: int64

In [73]:
nl_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 257 entries, 0 to 259
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               257 non-null    object 
 1   Model              257 non-null    object 
 2   Trim               257 non-null    object 
 3   Year               257 non-null    int64  
 4   Fuel type          257 non-null    object 
 5   Body type          257 non-null    object 
 6   Color              257 non-null    object 
 7   City               257 non-null    object 
 8   Transmission       257 non-null    object 
 9   Contact            257 non-null    object 
 10  Emission standard  257 non-null    int64  
 11  Price(EUR)         257 non-null    int64  
 12  Mileage(km)        257 non-null    int64  
 13  kW                 257 non-null    float64
dtypes: float64(1), int64(4), object(9)
memory usage: 30.1+ KB


In [74]:
nl_offers['Country'] = "Netherlands"

In [75]:
nl_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW,Country
0,Nissan,Leaf,40 kWh,2019,Electric,Hatchback,Silver,Tilburg,Automatic,https://www.autoscout24.nl/aanbod/nissan-leaf-...,6,17240,92555,111.0,Netherlands
1,Nissan,Leaf,e+ 62 kWh,2019,Electric,Hatchback,Silver,Tilburg,Automatic,https://www.marktplaats.nl/v/auto-s/nissan/m20...,6,21740,56644,162.0,Netherlands
2,Bmw,X1,2.0 sdrive20i,2015,Petrol,Suv,Silver,Middelburg,Automatic,https://www.zeelandnet.nl/prikbord/bmw-x1-sdri...,6,22445,100553,137.0,Netherlands
3,Nissan,Qashqai,1.2,2018,Petrol,Crossover,Black,Haarlem,Automatic,https://usedcars.nissan.nl/nissan-voorraad/voe...,6,21950,59498,86.0,Netherlands
4,Nissan,Juke,1.0 DIG-T,2023,Petrol,Crossover,White,Middelburg,Automatic,https://www.zeelandnet.nl/prikbord/nissan-juke...,6,31900,12285,85.0,Netherlands


In [76]:
nl_offers.to_csv('cleaned_Netherlands_offers.csv', index=False)

6. Poland

In [79]:
pl_offers = pd.read_csv('Poland.csv')
pl_offers

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"55,141 EUR (59,900 USD)",Škoda,Octavia,1.6,"127,000 km",2018,Diesel,Sedan,Silver,Koło,84 kW (114 HP),Automatic,https://automotodeal.pl/osobowe/Skoda-octavia-...
1,"78,155 EUR (84,900 USD)",Volvo,V60,2.0 D3,"56,000 km",2017,Diesel,Wagon,White,Koło,110 kW (150 HP),Automatic,https://automotodeal.pl/v60/volvo-v60-volvo-v6...
2,"10,954 EUR (11,900 USD)",Honda,Accord,2.4,"152,000 km",2005,Petrol,Sedan,Silver,Łuków,139 kW (189 HP),Automatic,https://automotodeal.pl/vii-2002-2008/honda-ac...
3,"20,160 EUR (21,900 USD)",Chevrolet,Aveo,1.4,"178,024 km",2011,Petrol,Sedan,Gray,Świebodzin,73 kW (99 HP),Manual,https://automotodeal.pl/aveo/chevrolet-aveo-14...
4,"184,019 EUR (199,900 USD)",Audi,Q7,3.0,"78,759 km",2017,Diesel,Suv,Gray,Gdańsk,160 kW (218 HP),Automatic,https://automotodeal.pl/q7/audi-q7-psalon-vat-...
5,"59,744 EUR (64,900 USD)",Renault,Talisman,1.6,"135,794 km",2016,Diesel,Sedan,Blue,Gdańsk,95 kW (129 HP),Automatic,https://automotodeal.pl/talisman/renault-talis...
6,"22,093 EUR (24,000 USD)",Mercedes Benz,B-Class,200,"171,000 km",2007,Petrol,Hatchback,Black,Gdynia,99 kW (135 HP),Manual,https://automotodeal.pl/osobowe/mercedes-b-200...
7,"52,471 EUR (57,000 USD)",Mazda,Cx-5,2.5,"157,000 km",2014,Petrol,Crossover,White,Gdynia,134 kW (182 HP),Automatic,https://automotodeal.pl/cx-5/mazda-cx-5-25-awd...
8,"32,218 EUR (34,999 USD)",Peugeot,508,2.0 HDi,"170,000 km",2012,Diesel,Sedan,White,Gdańsk,102 kW (139 HP),Manual,https://automotodeal.pl/508/peugeot-508-20hdi-...
9,"26,604 EUR (28,900 USD)",Mazda,Cx-7,2.3,"150,000 km",2008,Petrol,Suv,Black,Grodzisk Mazowiecki,191 kW (260 HP),Automatic,https://automotodeal.pl/cx-7/mazda-cx-7-tylko-...


In [80]:
rows_without_PLN = pl_offers[~pl_offers['Price'].str.contains('PLN', na=False)]
rows_without_PLN

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"55,141 EUR (59,900 USD)",Škoda,Octavia,1.6,"127,000 km",2018,Diesel,Sedan,Silver,Koło,84 kW (114 HP),Automatic,https://automotodeal.pl/osobowe/Skoda-octavia-...
1,"78,155 EUR (84,900 USD)",Volvo,V60,2.0 D3,"56,000 km",2017,Diesel,Wagon,White,Koło,110 kW (150 HP),Automatic,https://automotodeal.pl/v60/volvo-v60-volvo-v6...
2,"10,954 EUR (11,900 USD)",Honda,Accord,2.4,"152,000 km",2005,Petrol,Sedan,Silver,Łuków,139 kW (189 HP),Automatic,https://automotodeal.pl/vii-2002-2008/honda-ac...
3,"20,160 EUR (21,900 USD)",Chevrolet,Aveo,1.4,"178,024 km",2011,Petrol,Sedan,Gray,Świebodzin,73 kW (99 HP),Manual,https://automotodeal.pl/aveo/chevrolet-aveo-14...
4,"184,019 EUR (199,900 USD)",Audi,Q7,3.0,"78,759 km",2017,Diesel,Suv,Gray,Gdańsk,160 kW (218 HP),Automatic,https://automotodeal.pl/q7/audi-q7-psalon-vat-...
5,"59,744 EUR (64,900 USD)",Renault,Talisman,1.6,"135,794 km",2016,Diesel,Sedan,Blue,Gdańsk,95 kW (129 HP),Automatic,https://automotodeal.pl/talisman/renault-talis...
6,"22,093 EUR (24,000 USD)",Mercedes Benz,B-Class,200,"171,000 km",2007,Petrol,Hatchback,Black,Gdynia,99 kW (135 HP),Manual,https://automotodeal.pl/osobowe/mercedes-b-200...
7,"52,471 EUR (57,000 USD)",Mazda,Cx-5,2.5,"157,000 km",2014,Petrol,Crossover,White,Gdynia,134 kW (182 HP),Automatic,https://automotodeal.pl/cx-5/mazda-cx-5-25-awd...
8,"32,218 EUR (34,999 USD)",Peugeot,508,2.0 HDi,"170,000 km",2012,Diesel,Sedan,White,Gdańsk,102 kW (139 HP),Manual,https://automotodeal.pl/508/peugeot-508-20hdi-...
9,"26,604 EUR (28,900 USD)",Mazda,Cx-7,2.3,"150,000 km",2008,Petrol,Suv,Black,Grodzisk Mazowiecki,191 kW (260 HP),Automatic,https://automotodeal.pl/cx-7/mazda-cx-7-tylko-...


In [81]:
pl_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Price         260 non-null    object
 1   Make          260 non-null    object
 2   Model         260 non-null    object
 3   Trim          249 non-null    object
 4   Mi            260 non-null    object
 5   Year          260 non-null    int64 
 6   Fuel type     260 non-null    object
 7   Body type     260 non-null    object
 8   Color         260 non-null    object
 9   City          260 non-null    object
 10  Power         251 non-null    object
 11  Transmission  255 non-null    object
 12  Contact       260 non-null    object
dtypes: int64(1), object(12)
memory usage: 26.5+ KB


In [82]:
# First need to only keep euro as currency
pl_offers['Price'] = pl_offers['Price'].str.split('(').str[0].str.strip()

In [83]:
pl_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"55,141 EUR",Škoda,Octavia,1.6,"127,000 km",2018,Diesel,Sedan,Silver,Koło,84 kW (114 HP),Automatic,https://automotodeal.pl/osobowe/Skoda-octavia-...
1,"78,155 EUR",Volvo,V60,2.0 D3,"56,000 km",2017,Diesel,Wagon,White,Koło,110 kW (150 HP),Automatic,https://automotodeal.pl/v60/volvo-v60-volvo-v6...
2,"10,954 EUR",Honda,Accord,2.4,"152,000 km",2005,Petrol,Sedan,Silver,Łuków,139 kW (189 HP),Automatic,https://automotodeal.pl/vii-2002-2008/honda-ac...
3,"20,160 EUR",Chevrolet,Aveo,1.4,"178,024 km",2011,Petrol,Sedan,Gray,Świebodzin,73 kW (99 HP),Manual,https://automotodeal.pl/aveo/chevrolet-aveo-14...
4,"184,019 EUR",Audi,Q7,3.0,"78,759 km",2017,Diesel,Suv,Gray,Gdańsk,160 kW (218 HP),Automatic,https://automotodeal.pl/q7/audi-q7-psalon-vat-...


In [84]:
pl_offers['Emission standard'] =pl_offers['Year'].apply(calculate_euro_norm)
pl_offers['Price(EUR)'] = pl_offers['Price'].apply(clean_price)
pl_offers['Mileage(km)'] = pl_offers['Mi'].apply(clean_mi)
pl_offers['kW'] = pl_offers['Power'].apply(power_kW_split)

In [85]:
pl_offers.loc[rows_without_PLN.index, 'Price(EUR)'] *= 0.23

  7410.14  6118.92 26360.07 16070.1  13762.05 33855.31 26360.07  7198.77
 23904.13 16916.96 12195.52  3154.68  7389.21  5483.66 16514.46  9061.77
 15011.41 16705.36 15455.77 18992.02 19627.05 31102.9  12259.   19034.34
  9718.19 13741.12  4213.37 16239.38 12894.26  2116.92 22845.44 11750.93
  8214.91  3599.27 18187.48  2731.25 25386.25 26887.23  6118.92 14395.24
 14333.83  9949.11  7156.22  9506.59  4991.92  4655.89  1460.73 10776.88
 19264.34  4763.76  8257.23 31505.17 10586.21 11221.47 35464.39 14651.46
  3154.68  5039.07  3154.68 11623.74 11412.14 18568.59  5271.83 20050.71
 33855.31 12470.6  14799.81  4340.33 16874.64  6753.95 12682.43  9102.02
  5060.23  4403.81  5271.83  1249.13  2519.42  4636.8   4213.37  1460.73
  7939.83  4636.8   7177.38  6118.92  6330.52  3808.8  13529.29  2731.25
 25089.78 29620.78  2115.08 26444.71 11073.35  8024.47 10755.72 16938.12
 12470.6   3914.83  8659.5   5483.66  6923.46  8214.91  6753.95 19457.77
  3556.95 15453.93 28138.66  2032.51  6330.52  8024

In [86]:
pl_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW
0,"55,141 EUR",Škoda,Octavia,1.6,"127,000 km",2018,Diesel,Sedan,Silver,Koło,84 kW (114 HP),Automatic,https://automotodeal.pl/osobowe/Skoda-octavia-...,6,12682.43,127000,84.0
1,"78,155 EUR",Volvo,V60,2.0 D3,"56,000 km",2017,Diesel,Wagon,White,Koło,110 kW (150 HP),Automatic,https://automotodeal.pl/v60/volvo-v60-volvo-v6...,6,17975.65,56000,110.0
2,"10,954 EUR",Honda,Accord,2.4,"152,000 km",2005,Petrol,Sedan,Silver,Łuków,139 kW (189 HP),Automatic,https://automotodeal.pl/vii-2002-2008/honda-ac...,4,2519.42,152000,139.0
3,"20,160 EUR",Chevrolet,Aveo,1.4,"178,024 km",2011,Petrol,Sedan,Gray,Świebodzin,73 kW (99 HP),Manual,https://automotodeal.pl/aveo/chevrolet-aveo-14...,5,4636.8,178024,73.0
4,"184,019 EUR",Audi,Q7,3.0,"78,759 km",2017,Diesel,Suv,Gray,Gdańsk,160 kW (218 HP),Automatic,https://automotodeal.pl/q7/audi-q7-psalon-vat-...,6,42324.37,78759,160.0


In [87]:
pl_offers.drop(columns= ['Price', 'Mi', 'Power'], inplace= True)

In [88]:
pl_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW
0,Škoda,Octavia,1.6,2018,Diesel,Sedan,Silver,Koło,Automatic,https://automotodeal.pl/osobowe/Skoda-octavia-...,6,12682.43,127000,84.0
1,Volvo,V60,2.0 D3,2017,Diesel,Wagon,White,Koło,Automatic,https://automotodeal.pl/v60/volvo-v60-volvo-v6...,6,17975.65,56000,110.0
2,Honda,Accord,2.4,2005,Petrol,Sedan,Silver,Łuków,Automatic,https://automotodeal.pl/vii-2002-2008/honda-ac...,4,2519.42,152000,139.0
3,Chevrolet,Aveo,1.4,2011,Petrol,Sedan,Gray,Świebodzin,Manual,https://automotodeal.pl/aveo/chevrolet-aveo-14...,5,4636.8,178024,73.0
4,Audi,Q7,3.0,2017,Diesel,Suv,Gray,Gdańsk,Automatic,https://automotodeal.pl/q7/audi-q7-psalon-vat-...,6,42324.37,78759,160.0


In [89]:
pl_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               260 non-null    object 
 1   Model              260 non-null    object 
 2   Trim               249 non-null    object 
 3   Year               260 non-null    int64  
 4   Fuel type          260 non-null    object 
 5   Body type          260 non-null    object 
 6   Color              260 non-null    object 
 7   City               260 non-null    object 
 8   Transmission       255 non-null    object 
 9   Contact            260 non-null    object 
 10  Emission standard  260 non-null    int64  
 11  Price(EUR)         260 non-null    float64
 12  Mileage(km)        260 non-null    int64  
 13  kW                 251 non-null    float64
dtypes: float64(2), int64(3), object(9)
memory usage: 28.6+ KB


In [90]:
pl_offers.isna().sum()

Make                  0
Model                 0
Trim                 11
Year                  0
Fuel type             0
Body type             0
Color                 0
City                  0
Transmission          5
Contact               0
Emission standard     0
Price(EUR)            0
Mileage(km)           0
kW                    9
dtype: int64

In [91]:
pl_offers = pl_offers.dropna()

In [92]:
pl_offers.isna().sum()

Make                 0
Model                0
Trim                 0
Year                 0
Fuel type            0
Body type            0
Color                0
City                 0
Transmission         0
Contact              0
Emission standard    0
Price(EUR)           0
Mileage(km)          0
kW                   0
dtype: int64

In [93]:
pl_offers['Country'] = "Poland"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pl_offers['Country'] = "Poland"


In [94]:
pl_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW,Country
0,Škoda,Octavia,1.6,2018,Diesel,Sedan,Silver,Koło,Automatic,https://automotodeal.pl/osobowe/Skoda-octavia-...,6,12682.43,127000,84.0,Poland
1,Volvo,V60,2.0 D3,2017,Diesel,Wagon,White,Koło,Automatic,https://automotodeal.pl/v60/volvo-v60-volvo-v6...,6,17975.65,56000,110.0,Poland
2,Honda,Accord,2.4,2005,Petrol,Sedan,Silver,Łuków,Automatic,https://automotodeal.pl/vii-2002-2008/honda-ac...,4,2519.42,152000,139.0,Poland
3,Chevrolet,Aveo,1.4,2011,Petrol,Sedan,Gray,Świebodzin,Manual,https://automotodeal.pl/aveo/chevrolet-aveo-14...,5,4636.8,178024,73.0,Poland
4,Audi,Q7,3.0,2017,Diesel,Suv,Gray,Gdańsk,Automatic,https://automotodeal.pl/q7/audi-q7-psalon-vat-...,6,42324.37,78759,160.0,Poland


In [95]:
pl_offers.to_csv('cleaned_Poland_offers.csv', index=False)

7. Spain

In [96]:
sp_offers = pd.read_csv('Spain.csv')
sp_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Price         260 non-null    object
 1   Make          260 non-null    object
 2   Model         260 non-null    object
 3   Trim          256 non-null    object
 4   Mi            260 non-null    object
 5   Year          260 non-null    int64 
 6   Fuel type     260 non-null    object
 7   Body type     260 non-null    object
 8   Color         260 non-null    object
 9   City          260 non-null    object
 10  Power         260 non-null    object
 11  Transmission  260 non-null    object
 12  Contact       260 non-null    object
dtypes: int64(1), object(12)
memory usage: 26.5+ KB


In [97]:
sp_offers['Emission standard'] = sp_offers['Year'].apply(calculate_euro_norm)
sp_offers['Price(EUR)'] = sp_offers['Price'].apply(clean_price)
sp_offers['Mileage(km)'] = sp_offers['Mi'].apply(clean_mi)
sp_offers['kW'] = sp_offers['Power'].apply(power_kW_split)


In [98]:
sp_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW
0,"21,890 EUR",Jaguar,F-Pace,2.0,"123,009 km",2016,Diesel,Crossover,Red,Madrid,134 kW (182 HP),Automatic,https://coches.km77.com/jaguar/f-pace/en-madri...,6,21890,123009,134.0
1,"14,495 EUR",Peugeot,2008,1.2 puretech,"81,000 km",2017,Petrol,Crossover,Gray,Valencia,100 kW (136 HP),Manual,https://coches.km77.com/peugeot/2008/en-valenc...,6,14495,81000,100.0
2,"7,490 EUR",Fiat,500,1.4,"100,585 km",2009,Petrol,Mini,Orange,Madrid,74 kW (101 HP),Manual,https://coches.km77.com/fiat/500/en-madrid/236...,5,7490,100585,74.0
3,"22,300 EUR",Jeep,Renegade,1.6 Limited,"64,433 km",2021,Diesel,Crossover,Gray,Vélez Málaga,130 kW (177 HP),Manual,https://coches.km77.com/jeep/renegade/en-malag...,6,22300,64433,130.0
4,"18,695 EUR",Ford,Puma,1.0 EcoBoost,"47,881 km",2020,Hybrid,Crossover,Blue,Valencia,125 kW (170 HP),Manual,https://coches.km77.com/ford/puma/en-valencia/...,6,18695,47881,125.0


In [99]:
sp_offers.drop(columns= ['Price', 'Mi', 'Power'], inplace= True)

In [101]:
#sp_offers.drop(columns= ['Euro norm'], inplace= True)

In [102]:
sp_offers.isna().sum()

Make                 0
Model                0
Trim                 4
Year                 0
Fuel type            0
Body type            0
Color                0
City                 0
Transmission         0
Contact              0
Emission standard    0
Price(EUR)           0
Mileage(km)          0
kW                   0
dtype: int64

In [103]:
sp_offers = sp_offers.dropna()

In [104]:
sp_offers.isna().sum()

Make                 0
Model                0
Trim                 0
Year                 0
Fuel type            0
Body type            0
Color                0
City                 0
Transmission         0
Contact              0
Emission standard    0
Price(EUR)           0
Mileage(km)          0
kW                   0
dtype: int64

In [105]:
sp_offers['Country'] = "Spain"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sp_offers['Country'] = "Spain"


In [106]:
sp_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW,Country
0,Jaguar,F-Pace,2.0,2016,Diesel,Crossover,Red,Madrid,Automatic,https://coches.km77.com/jaguar/f-pace/en-madri...,6,21890,123009,134.0,Spain
1,Peugeot,2008,1.2 puretech,2017,Petrol,Crossover,Gray,Valencia,Manual,https://coches.km77.com/peugeot/2008/en-valenc...,6,14495,81000,100.0,Spain
2,Fiat,500,1.4,2009,Petrol,Mini,Orange,Madrid,Manual,https://coches.km77.com/fiat/500/en-madrid/236...,5,7490,100585,74.0,Spain
3,Jeep,Renegade,1.6 Limited,2021,Diesel,Crossover,Gray,Vélez Málaga,Manual,https://coches.km77.com/jeep/renegade/en-malag...,6,22300,64433,130.0,Spain
4,Ford,Puma,1.0 EcoBoost,2020,Hybrid,Crossover,Blue,Valencia,Manual,https://coches.km77.com/ford/puma/en-valencia/...,6,18695,47881,125.0,Spain


In [107]:
sp_offers.to_csv('cleaned_Spain_offers.csv', index=False)

8. Italy

In [108]:
it_offers= pd.read_csv('Italy.csv')
it_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact,Euro norm
0,"33,900 EUR",Volvo,Xc40,1.5 T3,"12,000 km",2021,Petrol,Crossover,Black,Roma,121 kW (165 HP),Automatic,https://www.trova-automobile.it/auto/annuncio/...,
1,"8,900 EUR",Chevrolet,Trax,1.4 LTZ,"137,000 km",2014,Diesel,Crossover,Black,Ossona,97 kW (132 HP),Automatic,https://www.trova-automobile.it/auto/annuncio/...,
2,"44,990 EUR",Audi,Q3,2.0 35,"17,000 km",2023,Diesel,Crossover,Black,Alcamo,111 kW (151 HP),Automatic,https://www.trova-automobile.it/auto/annuncio/...,6.0
3,"33,990 EUR",Bmw,X3,xDrive20d,"99,000 km",2019,Diesel,Suv,Black,Alcamo,141 kW (192 HP),Automatic,https://www.trova-automobile.it/auto/annuncio/...,6.0
4,"6,499 EUR",Audi,A6,2.7 TDI V6 Quattro TipTronic,"89,000 km",2006,Diesel,Wagon,Black,Roma,134 kW (182 HP),Automatic,https://www.trova-automobile.it/auto/annuncio/...,4.0


In [109]:
it_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Price         260 non-null    object 
 1   Make          260 non-null    object 
 2   Model         260 non-null    object 
 3   Trim          257 non-null    object 
 4   Mi            260 non-null    object 
 5   Year          260 non-null    int64  
 6   Fuel type     260 non-null    object 
 7   Body type     260 non-null    object 
 8   Color         260 non-null    object 
 9   City          260 non-null    object 
 10  Power         260 non-null    object 
 11  Transmission  259 non-null    object 
 12  Contact       260 non-null    object 
 13  Euro norm     169 non-null    float64
dtypes: float64(1), int64(1), object(12)
memory usage: 28.6+ KB


In [110]:
it_offers['Emission standard'] =it_offers['Year'].apply(calculate_euro_norm)
it_offers['Price(EUR)'] = it_offers['Price'].apply(clean_price)
it_offers['Mileage(km)'] = it_offers['Mi'].apply(clean_mi)
it_offers['kW'] = it_offers['Power'].apply(power_kW_split)

In [111]:
it_offers.drop(columns= ['Price', 'Mi', 'Power', 'Euro norm'], inplace= True)

In [112]:
it_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               260 non-null    object 
 1   Model              260 non-null    object 
 2   Trim               257 non-null    object 
 3   Year               260 non-null    int64  
 4   Fuel type          260 non-null    object 
 5   Body type          260 non-null    object 
 6   Color              260 non-null    object 
 7   City               260 non-null    object 
 8   Transmission       259 non-null    object 
 9   Contact            260 non-null    object 
 10  Emission standard  260 non-null    int64  
 11  Price(EUR)         260 non-null    int64  
 12  Mileage(km)        260 non-null    int64  
 13  kW                 260 non-null    float64
dtypes: float64(1), int64(4), object(9)
memory usage: 28.6+ KB


In [113]:
# delete the rows with missing values
it_offers =it_offers.dropna()

In [114]:
it_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 256 entries, 0 to 259
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               256 non-null    object 
 1   Model              256 non-null    object 
 2   Trim               256 non-null    object 
 3   Year               256 non-null    int64  
 4   Fuel type          256 non-null    object 
 5   Body type          256 non-null    object 
 6   Color              256 non-null    object 
 7   City               256 non-null    object 
 8   Transmission       256 non-null    object 
 9   Contact            256 non-null    object 
 10  Emission standard  256 non-null    int64  
 11  Price(EUR)         256 non-null    int64  
 12  Mileage(km)        256 non-null    int64  
 13  kW                 256 non-null    float64
dtypes: float64(1), int64(4), object(9)
memory usage: 30.0+ KB


In [115]:
it_offers['Country'] = "Italy"

In [116]:
it_offers.to_csv('cleaned_Italy_offers.csv', index=False)

9. Sweden

In [117]:
sd_offers= pd.read_csv('Sweden.csv')
sd_offers

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"699,000 EUR",Volvo,Xc90,2.0,"37,420 km",2022,Diesel,Suv,Silver,Svarta,173 kW (235 HP),Semi Automatic,https://www.bytbil.com/jamtlands-lan/personbil...
1,"13,220 EUR (149,800 SEK)",Hyundai,Ix35,1.6 GDi,"109,820 km",2014,Petrol,Crossover,Black,Vallentuna,123 kW (167 HP),Automatic,https://bilweb.se/stockholms-lan/hyundai-ix35-...
2,"33,449 EUR (379,000 SEK)",Bmw,X6,xDrive30d,"100,000 km",2015,Diesel,Suv,Black,Eskilstuna,190 kW (258 HP),Automatic,https://www.blocket.se/annons/1001042692
3,"50,297 EUR (569,900 SEK)",Audi,A7,4.0 TFSI,"110,000 km",2015,Petrol,Sedan,White,Landvetter,412 kW (560 HP),Automatic,https://www.bytbil.com/vastra-gotalands-lan/pe...
4,"9,258 EUR (104,900 SEK)",Mercedes Benz,C-Class,,"193,000 km",2024,Diesel,Sedan,Silver,Karlstad,200 kW (272 HP),Automatic,https://www.bytbil.com/varmlands-lan/personbil...
5,"389,900 EUR",Bmw,5 Series,530e,"37,990 km",2023,Hybrid,Sedan,Black,Svarta,135 kW (184 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...
6,"849,900 EUR",Bmw,X5,,"28,000 km",2024,Hybrid,Suv,Blue,Knivsta,210 kW (286 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...
7,"48,453 EUR (549,000 SEK)",Bmw,7 Series,740d,"57,000 km",2018,Diesel,Sedan,Black,Vallentuna,235 kW (320 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...
8,"11,023 EUR (124,900 SEK)",Nissan,Qashqai,1.6,"189,760 km",2014,Diesel,Crossover,White,Kalmar,96 kW (131 HP),Automatic,https://www.bytbil.com/kalmar-lan/personbil-qa...
9,"29,998 EUR (339,900 SEK)",Bmw,X4,xDrive30d,"100,510 km",2017,Diesel,Suv,White,Bro,190 kW (258 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...


In [118]:
rows_without_SEK = sd_offers[~sd_offers['Price'].str.contains('SEK', na=False)]

In [119]:
rows_without_SEK

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"699,000 EUR",Volvo,Xc90,2.0,"37,420 km",2022,Diesel,Suv,Silver,Svarta,173 kW (235 HP),Semi Automatic,https://www.bytbil.com/jamtlands-lan/personbil...
5,"389,900 EUR",Bmw,5 Series,530e,"37,990 km",2023,Hybrid,Sedan,Black,Svarta,135 kW (184 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...
6,"849,900 EUR",Bmw,X5,,"28,000 km",2024,Hybrid,Suv,Blue,Knivsta,210 kW (286 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...
13,"374,900 EUR",Volvo,Xc60,2.0 T8 AWD,230 km,2024,Hybrid,Crossover,White,Karlstad,235 kW (320 HP),Automatic,https://www.bytbil.com/varmlands-lan/personbil...
14,"369,900 EUR",Bmw,5 Series,530e,"74,890 km",2023,Hybrid,Sedan,White,Uppsala,135 kW (184 HP),Automatic,https://www.bytbil.com/uppsala-lan/personbil-5...
19,"629,900 EUR",Audi,Q7,3.0 tfsi,"34,900 km",2023,Hybrid,Suv,Black,Are,250 kW (340 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...
22,"549,900 EUR",Volvo,Xc60,2.0 Recharge T6,"62,000 km",2024,Hybrid,Crossover,White,Stockholm,186 kW (253 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...
24,"379,900 EUR",Mercedes Benz,E-Class,,"63,490 km",2019,Diesel,Sedan,White,Stockholm,135 kW (184 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...
28,"369,900 EUR",Mercedes Benz,Glc-Class,,"162,200 km",2019,Diesel,Suv,Black,Uppsala,120 kW (163 HP),Automatic,https://www.bytbil.com/uppsala-lan/personbil-g...
47,"379,900 EUR",Mercedes Benz,E-Class,,"116,800 km",2024,Diesel,Sedan,Black,Tumba,180 kW (245 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...


There are some rows without SEK currency value, and the EUR are actually SEK. The exchange rate is 0,087

In [120]:
# First need to only keep euro as currency
sd_offers['Price'] = sd_offers['Price'].str.split('(').str[0].str.strip()

In [121]:
sd_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"699,000 EUR",Volvo,Xc90,2.0,"37,420 km",2022,Diesel,Suv,Silver,Svarta,173 kW (235 HP),Semi Automatic,https://www.bytbil.com/jamtlands-lan/personbil...
1,"13,220 EUR",Hyundai,Ix35,1.6 GDi,"109,820 km",2014,Petrol,Crossover,Black,Vallentuna,123 kW (167 HP),Automatic,https://bilweb.se/stockholms-lan/hyundai-ix35-...
2,"33,449 EUR",Bmw,X6,xDrive30d,"100,000 km",2015,Diesel,Suv,Black,Eskilstuna,190 kW (258 HP),Automatic,https://www.blocket.se/annons/1001042692
3,"50,297 EUR",Audi,A7,4.0 TFSI,"110,000 km",2015,Petrol,Sedan,White,Landvetter,412 kW (560 HP),Automatic,https://www.bytbil.com/vastra-gotalands-lan/pe...
4,"9,258 EUR",Mercedes Benz,C-Class,,"193,000 km",2024,Diesel,Sedan,Silver,Karlstad,200 kW (272 HP),Automatic,https://www.bytbil.com/varmlands-lan/personbil...


In [122]:
sd_offers['Emission standard'] =sd_offers['Year'].apply(calculate_euro_norm)
sd_offers['Price(EUR)'] = sd_offers['Price'].apply(clean_price)
sd_offers['Mileage(km)'] = sd_offers['Mi'].apply(clean_mi)
sd_offers['kW'] = sd_offers['Power'].apply(power_kW_split)

In [123]:
# Change the value with the index from the rows_without_SEK by multiplying 0.087
sd_offers.loc[rows_without_SEK.index, 'Price(EUR)'] = sd_offers.loc[rows_without_SEK.index, 'Price(EUR)'] * 0.087

 33051.3 46101.3 32973.  33834.3 47763.  82563.  47763.  47841.3 32103.
 31755.  16077.6 31311.3 47763.  49581.3 32181.3 31180.8 31311.3 32607.6
 48633.  13398.  46101.3  9561.3 17826.3 32973.  15999.3 14346.3 47841.3
 60891.3 69591.3 34356.3 16947.6 14694.3 14328.9 31311.3 32973.  16086.3
 33921.3 31233.  14346.3  9996.3  9483.   8256.3 33051.3 16956.3 33051.3
 62205.  31746.3 31311.3 71253.  33912.6 14337.6 32094.3 16086.3  9561.3]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  sd_offers.loc[rows_without_SEK.index, 'Price(EUR)'] = sd_offers.loc[rows_without_SEK.index, 'Price(EUR)'] * 0.087


In [124]:
sd_offers.drop(columns= ['Price', 'Mi', 'Power'], inplace= True)

In [125]:
sd_offers.isna().sum()

Make                  0
Model                 0
Trim                 29
Year                  0
Fuel type             0
Body type             0
Color                 0
City                  0
Transmission          0
Contact               0
Emission standard     0
Price(EUR)            0
Mileage(km)           0
kW                    8
dtype: int64

In [126]:
sd_offers.loc[sd_offers['Color'].isna(), 'Color'] = "NA"

In [127]:
sd_offers = sd_offers.dropna()

In [128]:
sd_offers.isna().sum()

Make                 0
Model                0
Trim                 0
Year                 0
Fuel type            0
Body type            0
Color                0
City                 0
Transmission         0
Contact              0
Emission standard    0
Price(EUR)           0
Mileage(km)          0
kW                   0
dtype: int64

In [133]:
sd_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW,Country
0,Volvo,Xc90,2.0,2022,Diesel,Suv,Silver,Svarta,Semi Automatic,https://www.bytbil.com/jamtlands-lan/personbil...,6,60813.0,37420,173.0,Sweden
1,Hyundai,Ix35,1.6 GDi,2014,Petrol,Crossover,Black,Vallentuna,Automatic,https://bilweb.se/stockholms-lan/hyundai-ix35-...,6,13220.0,109820,123.0,Sweden
2,Bmw,X6,xDrive30d,2015,Diesel,Suv,Black,Eskilstuna,Automatic,https://www.blocket.se/annons/1001042692,6,33449.0,100000,190.0,Sweden
3,Audi,A7,4.0 TFSI,2015,Petrol,Sedan,White,Landvetter,Automatic,https://www.bytbil.com/vastra-gotalands-lan/pe...,6,50297.0,110000,412.0,Sweden
5,Bmw,5 Series,530e,2023,Hybrid,Sedan,Black,Svarta,Automatic,https://www.bytbil.com/stockholms-lan/personbi...,6,33921.3,37990,135.0,Sweden


In [131]:
sd_offers['Country'] = "Sweden"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sd_offers['Country'] = "Sweden"


In [132]:
sd_offers.to_csv('cleaned_Sweden_offers.csv', index=False)

10. Switzerland

In [134]:
sl_offers = pd.read_csv('Switzerland.csv')
sl_offers.head()

Unnamed: 0,Price,Make,Model,Mi,Year,Fuel type,Body type,Color,City,Transmission,Contact,Trim,Power,Euro norm
0,"46,607 EUR (44,988 CHF)",Volvo,Ex30,0 km,2024,Electric,Suv,Black,Kreuzlingen,Automatic,https://www.carforyou.ch/de/auto/volvo/ex30,,,
1,"15,322 EUR (14,790 CHF)",Mazda,626,"112,003 km",2017,Petrol,Wagon,Black,Spiez,Manual,https://www.carforyou.ch/de/auto/mazda/626,,,
2,"49,261 EUR (47,550 CHF)",Mazda,Cx-60,0 km,2023,,Suv,Black,Uster,Automatic,https://www.carforyou.ch/de/auto/mazda/cx-60,3.3 e-Skyactiv D,,
3,"29,526 EUR (28,500 CHF)",Honda,S2000,"12,000 km",2001,,Convertible,Black,Gossau,Manual,https://www.carforyou.ch/de/auto/honda/s2000,,,
4,"27,600 EUR",Abarth,595,"34,000 km",2019,Petrol,Coupe,,,,https://www.woowmotors.com/car/2019-abarth-500,1.4 competizione,,


In [135]:
sl_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 169 entries, 0 to 168
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Price         169 non-null    object 
 1   Make          169 non-null    object 
 2   Model         169 non-null    object 
 3   Mi            167 non-null    object 
 4   Year          169 non-null    int64  
 5   Fuel type     117 non-null    object 
 6   Body type     151 non-null    object 
 7   Color         84 non-null     object 
 8   City          6 non-null      object 
 9   Transmission  157 non-null    object 
 10  Contact       169 non-null    object 
 11  Trim          142 non-null    object 
 12  Power         149 non-null    object 
 13  Euro norm     91 non-null     float64
dtypes: float64(1), int64(1), object(12)
memory usage: 18.6+ KB


In [136]:
# Removing the missing values for "Mi", "Fuel type", 'Body type', 'Color', 'Transmission', 'Power'
sl_offers.dropna(subset=["Mi", 'Trim', "Fuel type", 'Body type', 'Transmission', 'Power'], inplace= True)

In [137]:
sl_offers.loc[sl_offers['Color'].isna(), 'Color'] = "NA"
sl_offers.loc[sl_offers['City'].isna(), 'City'] = "NA"

In [138]:
rows_without_CHF = sl_offers[~sl_offers['Price'].str.contains('CHF', na=False)]
rows_without_CHF

Unnamed: 0,Price,Make,Model,Mi,Year,Fuel type,Body type,Color,City,Transmission,Contact,Trim,Power,Euro norm


In [139]:
# First need to only keep euro as currency
sl_offers['Price'] = sl_offers['Price'].str.split('(').str[0].str.strip()

In [140]:
sl_offers['Emission standard'] =sl_offers['Year'].apply(calculate_euro_norm)
sl_offers['Price(EUR)'] = sl_offers['Price'].apply(clean_price)
sl_offers['Mileage(km)'] = sl_offers['Mi'].apply(clean_mi)
sl_offers['kW'] = sl_offers['Power'].apply(power_kW_split)

In [141]:
sl_offers.loc[rows_without_CHF.index, 'Price(EUR)'] *= 1.03

In [142]:
sl_offers.drop(columns= ['Price', 'Mi', 'Power', 'Euro norm'], inplace= True)

In [143]:
sl_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 79 entries, 16 to 127
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               79 non-null     object 
 1   Model              79 non-null     object 
 2   Year               79 non-null     int64  
 3   Fuel type          79 non-null     object 
 4   Body type          79 non-null     object 
 5   Color              79 non-null     object 
 6   City               79 non-null     object 
 7   Transmission       79 non-null     object 
 8   Contact            79 non-null     object 
 9   Trim               79 non-null     object 
 10  Emission standard  79 non-null     int64  
 11  Price(EUR)         79 non-null     int64  
 12  Mileage(km)        79 non-null     int64  
 13  kW                 79 non-null     float64
dtypes: float64(1), int64(4), object(9)
memory usage: 11.3+ KB


In [144]:
sl_offers['Country'] = "Switzerland"

In [145]:
sl_offers.head()

Unnamed: 0,Make,Model,Year,Fuel type,Body type,Color,City,Transmission,Contact,Trim,Emission standard,Price(EUR),Mileage(km),kW,Country
16,Škoda,Enyaq,2024,Electric,Suv,Blue,,Automatic,https://www.azw.ch/de/angebot/624/skoda-enyaq-...,electro,6,40196,10,134.0,Switzerland
17,Alfa Romeo,Tonale,2024,Hybrid,Crossover,Gray,,Automatic,https://www.azw.ch/de/angebot/1033/alfa-romeo-...,1.3,6,48174,6,208.0,Switzerland
18,Volkswagen,Touareg,2018,Diesel,Suv,Black,,Automatic,https://www.azw.ch/de/angebot/1014/vw-touareg-...,3.0 TDI,6,40922,25500,195.0,Switzerland
19,Volkswagen,Golf,2024,Diesel,Sedan,Black,,Automatic,https://www.azw.ch/de/angebot/1165/vw-golf-2-0...,2.0 TDI,6,36052,11,111.0,Switzerland
20,Škoda,Octavia,2023,Diesel,Wagon,Blue,,Automatic,https://www.azw.ch/de/angebot/1077/skoda-octav...,2.0 TDI,6,30872,12400,111.0,Switzerland


In [146]:
sl_offers.to_csv('cleaned_Switzerland_offers.csv', index=False)

11. Portugal

In [147]:
pt_offers = pd.read_csv('Portugal.csv')
pt_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"9,850 EUR",Alfa Romeo,Giulietta,1.6 JTDm,"184,394 km",2012,Diesel,Hatchback,Black,Maia,78 kW (106 HP),Manual,https://www.motores24h.pt/alfa-romeo-giulietta...
1,"15,480 EUR",Seat,Leon,1.6 TDI,"137,000 km",2019,Diesel,Hatchback,Black,Viseu,86 kW (117 HP),Manual,https://www.motores24h.pt/seat-leon-st-1.6-tdi...
2,"22,800 EUR",Bmw,4 Series,,"159,772 km",2014,Diesel,Coupe,Gray,Maia,137 kW (186 HP),Automatic,https://www.motores24h.pt/bmw-serie-4-420-d-xd...
3,"14,900 EUR",Renault,Megane,1.5 dCi,"117,635 km",2017,Diesel,Coupe,Gray,Maia,82 kW (112 HP),Manual,https://www.motores24h.pt/renault-megane-1.5-d...
4,"12,450 EUR",Nissan,Leaf,Tekna,"38,000 km",2017,Electric,Hatchback,White,Viseu,81 kW (110 HP),Automatic,https://www.motores24h.pt/nissan-leaf-tekna-30...


In [148]:
pt_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Price         260 non-null    object
 1   Make          260 non-null    object
 2   Model         260 non-null    object
 3   Trim          247 non-null    object
 4   Mi            260 non-null    object
 5   Year          260 non-null    int64 
 6   Fuel type     260 non-null    object
 7   Body type     260 non-null    object
 8   Color         258 non-null    object
 9   City          260 non-null    object
 10  Power         143 non-null    object
 11  Transmission  222 non-null    object
 12  Contact       260 non-null    object
dtypes: int64(1), object(12)
memory usage: 26.5+ KB


In [149]:
pt_offers.loc[pt_offers['Color'].isna(), 'Color'] = "NA"
pt_offers.loc[pt_offers['City'].isna(), 'City'] = "NA"

In [150]:
pt_offers = pt_offers.dropna()

In [151]:
pt_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 136 entries, 0 to 146
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Price         136 non-null    object
 1   Make          136 non-null    object
 2   Model         136 non-null    object
 3   Trim          136 non-null    object
 4   Mi            136 non-null    object
 5   Year          136 non-null    int64 
 6   Fuel type     136 non-null    object
 7   Body type     136 non-null    object
 8   Color         136 non-null    object
 9   City          136 non-null    object
 10  Power         136 non-null    object
 11  Transmission  136 non-null    object
 12  Contact       136 non-null    object
dtypes: int64(1), object(12)
memory usage: 14.9+ KB


In [152]:
pt_offers['Emission standard'] =pt_offers['Year'].apply(calculate_euro_norm)
pt_offers['Price(EUR)'] = pt_offers['Price'].apply(clean_price)
pt_offers['Mileage(km)'] = pt_offers['Mi'].apply(clean_mi)
pt_offers['kW'] = pt_offers['Power'].apply(power_kW_split)

In [153]:
pt_offers.drop(columns= ['Price', 'Mi', 'Power'], inplace= True)

In [154]:
pt_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 136 entries, 0 to 146
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               136 non-null    object 
 1   Model              136 non-null    object 
 2   Trim               136 non-null    object 
 3   Year               136 non-null    int64  
 4   Fuel type          136 non-null    object 
 5   Body type          136 non-null    object 
 6   Color              136 non-null    object 
 7   City               136 non-null    object 
 8   Transmission       136 non-null    object 
 9   Contact            136 non-null    object 
 10  Emission standard  136 non-null    int64  
 11  Price(EUR)         136 non-null    int64  
 12  Mileage(km)        136 non-null    int64  
 13  kW                 136 non-null    float64
dtypes: float64(1), int64(4), object(9)
memory usage: 15.9+ KB


In [155]:
pt_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW
0,Alfa Romeo,Giulietta,1.6 JTDm,2012,Diesel,Hatchback,Black,Maia,Manual,https://www.motores24h.pt/alfa-romeo-giulietta...,5,9850,184394,78.0
1,Seat,Leon,1.6 TDI,2019,Diesel,Hatchback,Black,Viseu,Manual,https://www.motores24h.pt/seat-leon-st-1.6-tdi...,6,15480,137000,86.0
3,Renault,Megane,1.5 dCi,2017,Diesel,Coupe,Gray,Maia,Manual,https://www.motores24h.pt/renault-megane-1.5-d...,6,14900,117635,82.0
4,Nissan,Leaf,Tekna,2017,Electric,Hatchback,White,Viseu,Automatic,https://www.motores24h.pt/nissan-leaf-tekna-30...,6,12450,38000,81.0
5,Bmw,1 Series,116d,2011,Diesel,Coupe,Black,Viseu,Manual,https://www.motores24h.pt/bmw-serie-1-116-d-ca...,5,11750,165000,86.0


In [156]:
pt_offers['Country'] = 'Portugal'

In [157]:
pt_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW,Country
0,Alfa Romeo,Giulietta,1.6 JTDm,2012,Diesel,Hatchback,Black,Maia,Manual,https://www.motores24h.pt/alfa-romeo-giulietta...,5,9850,184394,78.0,Portugal
1,Seat,Leon,1.6 TDI,2019,Diesel,Hatchback,Black,Viseu,Manual,https://www.motores24h.pt/seat-leon-st-1.6-tdi...,6,15480,137000,86.0,Portugal
3,Renault,Megane,1.5 dCi,2017,Diesel,Coupe,Gray,Maia,Manual,https://www.motores24h.pt/renault-megane-1.5-d...,6,14900,117635,82.0,Portugal
4,Nissan,Leaf,Tekna,2017,Electric,Hatchback,White,Viseu,Automatic,https://www.motores24h.pt/nissan-leaf-tekna-30...,6,12450,38000,81.0,Portugal
5,Bmw,1 Series,116d,2011,Diesel,Coupe,Black,Viseu,Manual,https://www.motores24h.pt/bmw-serie-1-116-d-ca...,5,11750,165000,86.0,Portugal


In [158]:
pt_offers.to_csv('cleaned_Portugal_offers.csv', index=False)

12. Denmark

In [159]:
dm_offers = pd.read_csv('Denmark.csv')
dm_offers

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"32,174 EUR (239,900 DKK)",Citroen,C3 Aircross,1.2,"9,500 km",2021,Petrol,Suv,Black,Brovst,96 kW (131 HP),Automatic,https://seek4cars.net/biler-til-salg/citroen/c...
1,"29,491 EUR (219,900 DKK)",Volkswagen,Tiguan,2.0 TDI 4Motion,"124,000 km",2016,Diesel,Crossover,White,Aarup,141 kW (192 HP),Automatic,https://seek4cars.net/biler-til-salg/vw/tiguan...
2,"44,914 EUR (334,900 DKK)",Mini,Cooper,1.6 S,"48,000 km",2017,Petrol,Convertible,Black,Varde,143 kW (194 HP),Automatic,https://www.autouncle.dk/en/d/4274148-used-min...
3,"29,491 EUR (219,900 DKK)",Bmw,4 Series,420d,"171,000 km",2017,Diesel,Coupe,Gray,Aarup,141 kW (192 HP),Automatic,https://seek4cars.net/biler-til-salg/bmw/420d/...
4,"4,023 EUR (29,999 DKK)",Hyundai,I20,1.1,"305,000 km",2015,Diesel,Hatchback,White,Hemmet,55 kW (75 HP),Manual,https://www.guloggratis.dk/annonce/5294a100-f5...
5,"16,080 EUR (119,900 DKK)",Toyota,Auris,1.6 VVT-i,"91,000 km",2013,Petrol,Hatchback,Gray,Aarup,98 kW (133 HP),Manual,https://seek4cars.net/biler-til-salg/toyota/au...
6,"7,899 EUR (58,900 DKK)",Volkswagen,Up!,1.0,"184,000 km",2014,Petrol,Mini,White,Hemmet,44 kW (60 HP),Manual,https://www.guloggratis.dk/annonce/79a52dac-ef...
7,"18,092 EUR (134,900 DKK)",Opel,Crossland X,1.2 Turbo,"51,000 km",2017,Petrol,Suv,Silver,Aarup,82 kW (112 HP),Manual,https://seek4cars.net/biler-til-salg/opel/cros...
8,"4,948 EUR (36,900 DKK)",Kia,Picanto,1.0,"164,000 km",2013,Petrol,Mini,White,Hemmet,51 kW (69 HP),Manual,https://www.guloggratis.dk/annonce/c1743312-a1...
9,"30,832 EUR (229,900 DKK)",Audi,Q2,1.6 TDI,"45,000 km",2017,Diesel,Crossover,Black,Ballerup,86 kW (117 HP),Manual,https://seek4cars.net/biler-til-salg/audi/q2/1...


For the missing DKK values in Price column, the EUR also needs to multiply the exchange rate 0,13

In [160]:
rows_without_DKK = dm_offers[~dm_offers['Price'].str.contains('DKK', na=False)]

In [161]:
dm_offers['Price'] = dm_offers['Price'].str.split('(').str[0].str.strip()

In [162]:
dm_offers['Emission standard'] =dm_offers['Year'].apply(calculate_euro_norm)
dm_offers['Price(EUR)'] = dm_offers['Price'].apply(clean_price)
dm_offers['Mileage(km)'] = dm_offers['Mi'].apply(clean_mi)
dm_offers['kW'] = dm_offers['Power'].apply(power_kW_split)

In [163]:
dm_offers.loc[rows_without_DKK.index, 'Price(EUR)'] = dm_offers.loc[rows_without_DKK.index, 'Price(EUR)'] * 0.13

In [164]:
dm_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Price              260 non-null    object 
 1   Make               260 non-null    object 
 2   Model              260 non-null    object 
 3   Trim               238 non-null    object 
 4   Mi                 260 non-null    object 
 5   Year               260 non-null    int64  
 6   Fuel type          260 non-null    object 
 7   Body type          260 non-null    object 
 8   Color              156 non-null    object 
 9   City               260 non-null    object 
 10  Power              236 non-null    object 
 11  Transmission       260 non-null    object 
 12  Contact            260 non-null    object 
 13  Emission standard  260 non-null    int64  
 14  Price(EUR)         260 non-null    int64  
 15  Mileage(km)        260 non-null    int64  
 16  kW                 236 non

In [165]:

dm_offers.loc[dm_offers['Color'].isna(), 'Color'] = "NA"

In [166]:
dm_offers = dm_offers.dropna()

In [167]:
dm_offers.drop(columns= ['Price', 'Mi', 'Power'], inplace= True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dm_offers.drop(columns= ['Price', 'Mi', 'Power'], inplace= True)


In [168]:
dm_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 214 entries, 0 to 259
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               214 non-null    object 
 1   Model              214 non-null    object 
 2   Trim               214 non-null    object 
 3   Year               214 non-null    int64  
 4   Fuel type          214 non-null    object 
 5   Body type          214 non-null    object 
 6   Color              214 non-null    object 
 7   City               214 non-null    object 
 8   Transmission       214 non-null    object 
 9   Contact            214 non-null    object 
 10  Emission standard  214 non-null    int64  
 11  Price(EUR)         214 non-null    int64  
 12  Mileage(km)        214 non-null    int64  
 13  kW                 214 non-null    float64
dtypes: float64(1), int64(4), object(9)
memory usage: 25.1+ KB


In [169]:
dm_offers['Country'] = 'Denmark'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dm_offers['Country'] = 'Denmark'


In [170]:
dm_offers.to_csv('cleaned_Denmark_offers.csv',index=False)

13.Norway

In [171]:
nw_offers = pd.read_csv('Norway.csv')
nw_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"37,981 EUR (439,000 NOK)",Lexus,Ux,300e,"2,000 km",2023,Electric,Crossover,Green,Leknes,152 kW (207 HP),Automatic,https://www.nordvik.no/bruktbil/342158655
1,"5,969 EUR (69,000 NOK)",Opel,Insignia,2.0 CDTi,"221,400 km",2010,Diesel,Wagon,Black,Jessheim,82 kW (112 HP),Manual,https://www.finn.no/car/used/ad.html?finnkode=...
2,"6,055 EUR (69,990 NOK)",Ford,Transit Custom,2.2,"215,000 km",2014,Diesel,Van,White,Ski,75 kW (102 HP),Manual,https://www.finn.no/car/used/ad.html?finnkode=...
3,"12,052 EUR (139,305 NOK)",Subaru,Forester,2.0 D,"231,574 km",2013,Diesel,Suv,Black,Tønsberg,109 kW (148 HP),Manual,https://www.finn.no/car/used/ad.html?finnkode=...
4,"7,354 EUR (85,000 NOK)",Toyota,Auris,1.4 D-4D,"210,000 km",2014,Diesel,Hatchback,Gray,Hønefoss,67 kW (91 HP),Manual,https://www.finn.no/car/used/ad.html?finnkode=...


In [172]:
rows_without_NOK = nw_offers[~nw_offers['Price'].str.contains('NOK', na=False)]
rows_without_NOK

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
61,"1,399,000 EUR",Mercedes Benz,S-Class,63 AMG,"64,000 km",2018,Petrol,Sedan,Gray,Skien,456 kW (620 HP),Automatic,https://www.finn.no/car/used/ad.html?finnkode=...
86,"35,990 EUR",Citroen,C3,1.6,"114,000 km",2006,Petrol,Hatchback,Gray,Skien,81 kW (110 HP),Automatic,https://www.finn.no/car/used/ad.html?finnkode=...
87,"53,000 EUR",Citroen,C3,1.6,"112,000 km",2006,Petrol,Hatchback,Gray,Skien,81 kW (110 HP),Automatic,https://www.finn.no/car/used/ad.html?finnkode=...


In [173]:
nw_offers['Price'] = nw_offers['Price'].str.split('(').str[0]

In [174]:
nw_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"37,981 EUR",Lexus,Ux,300e,"2,000 km",2023,Electric,Crossover,Green,Leknes,152 kW (207 HP),Automatic,https://www.nordvik.no/bruktbil/342158655
1,"5,969 EUR",Opel,Insignia,2.0 CDTi,"221,400 km",2010,Diesel,Wagon,Black,Jessheim,82 kW (112 HP),Manual,https://www.finn.no/car/used/ad.html?finnkode=...
2,"6,055 EUR",Ford,Transit Custom,2.2,"215,000 km",2014,Diesel,Van,White,Ski,75 kW (102 HP),Manual,https://www.finn.no/car/used/ad.html?finnkode=...
3,"12,052 EUR",Subaru,Forester,2.0 D,"231,574 km",2013,Diesel,Suv,Black,Tønsberg,109 kW (148 HP),Manual,https://www.finn.no/car/used/ad.html?finnkode=...
4,"7,354 EUR",Toyota,Auris,1.4 D-4D,"210,000 km",2014,Diesel,Hatchback,Gray,Hønefoss,67 kW (91 HP),Manual,https://www.finn.no/car/used/ad.html?finnkode=...


In [175]:
nw_offers['Emission standard'] =nw_offers['Year'].apply(calculate_euro_norm)
nw_offers['Price(EUR)'] = nw_offers['Price'].apply(clean_price)
nw_offers['Mileage(km)'] = nw_offers['Mi'].apply(clean_mi)
nw_offers['kW'] = nw_offers['Power'].apply(power_kW_split)

In [176]:
nw_offers.loc[rows_without_NOK.index, 'Price(EUR)'] *= 0.086

  nw_offers.loc[rows_without_NOK.index, 'Price(EUR)'] *= 0.086


In [177]:
nw_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Price              260 non-null    object 
 1   Make               260 non-null    object 
 2   Model              260 non-null    object 
 3   Trim               217 non-null    object 
 4   Mi                 260 non-null    object 
 5   Year               260 non-null    int64  
 6   Fuel type          260 non-null    object 
 7   Body type          260 non-null    object 
 8   Color              260 non-null    object 
 9   City               260 non-null    object 
 10  Power              258 non-null    object 
 11  Transmission       260 non-null    object 
 12  Contact            260 non-null    object 
 13  Emission standard  260 non-null    int64  
 14  Price(EUR)         260 non-null    float64
 15  Mileage(km)        260 non-null    int64  
 16  kW                 258 non

In [178]:
nw_offers.drop(columns= ['Price', 'Mi', 'Power'], inplace= True)

In [179]:
nw_offers = nw_offers.dropna()

In [180]:
nw_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW
0,Lexus,Ux,300e,2023,Electric,Crossover,Green,Leknes,Automatic,https://www.nordvik.no/bruktbil/342158655,6,37981.0,2000,152.0
1,Opel,Insignia,2.0 CDTi,2010,Diesel,Wagon,Black,Jessheim,Manual,https://www.finn.no/car/used/ad.html?finnkode=...,5,5969.0,221400,82.0
2,Ford,Transit Custom,2.2,2014,Diesel,Van,White,Ski,Manual,https://www.finn.no/car/used/ad.html?finnkode=...,6,6055.0,215000,75.0
3,Subaru,Forester,2.0 D,2013,Diesel,Suv,Black,Tønsberg,Manual,https://www.finn.no/car/used/ad.html?finnkode=...,5,12052.0,231574,109.0
4,Toyota,Auris,1.4 D-4D,2014,Diesel,Hatchback,Gray,Hønefoss,Manual,https://www.finn.no/car/used/ad.html?finnkode=...,6,7354.0,210000,67.0


In [181]:
nw_offers['Country'] = 'Norway'

In [182]:
nw_offers.to_csv('cleaned_Norway_offers.csv', index=False)

14. Hungary

In [183]:
hg_offers=pd.read_csv('Hungary.csv')
hg_offers

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Euro norm,Body type,Color,City,Power,Transmission,Contact
0,"26,512 EUR (10,450,000 HUF)",Kia,Sportage,1.6 GDi,"3,999 km",2023,Petrol,5.0,Crossover,Silver,Szombathely,110 kW (150 HP),Automatic,https://joautok.hu/hasznaltauto/kia/sportage/g...
1,"24,606 EUR (9,699,000 HUF)",Kia,Xceed,1.6,3 km,2024,Petrol,,Crossover,White,Szombathely,118 kW (160 HP),Automatic,https://www.szalonauto.hu/szemelyauto/kia/xcee...
2,"25,345 EUR (9,990,000 HUF)",Kia,Sportage,1.6 GDi,3 km,2024,Petrol,,Crossover,Gray,Szombathely,111 kW (151 HP),Manual,https://www.hasznaltauto.hu/szemelyauto/kia/sp...
3,"44,058 EUR (17,366,000 HUF)",Ford,Mustang Mach-E,,610 km,2024,Electric,,Crossover,Green,Szeged,216 kW (294 HP),Automatic,https://www.szalonauto.hu/szemelyauto/ford/mus...
4,"35,549 EUR (14,012,000 HUF)",Ford,Mustang Mach-E,,440 km,2024,Electric,,Crossover,Green,Szeged,197 kW (268 HP),Automatic,https://www.szalonauto.hu/szemelyauto/ford/mus...
5,"47,671 EUR (18,790,000 HUF)",Volkswagen,Tiguan,2.0 TDI 4Motion,0 km,2024,Diesel,,Crossover,Black,Kisvárda,110 kW (150 HP),Automatic,https://www.szalonauto.hu/szemelyauto/volkswag...
6,"21,539 EUR (8,490,000 HUF)",Citroen,C4,1.2 puretech,2 km,2023,Petrol,,Hatchback,Black,Budaörs,96 kW (131 HP),Automatic,https://joautok.hu/hasznaltauto/citroen/c4/c4-...
7,"30,419 EUR (11,990,000 HUF)",Volkswagen,T-Cross,1.5 tsi,0 km,2024,Petrol,,Crossover,Cyan,Kisvárda,110 kW (150 HP),Automatic,https://www.szalonauto.hu/szemelyauto/volkswag...
8,"48,178 EUR (18,990,000 HUF)",Volkswagen,Arteon,2.0 TSI,0 km,2024,Petrol,,Coupe,White,Kisvárda,139 kW (189 HP),Automatic,https://www.szalonauto.hu/szemelyauto/volkswag...
9,"30,419 EUR (11,990,000 HUF)",Volkswagen,Taigo,1.5 tsi,0 km,2024,Petrol,,Crossover,Silver,Kisvárda,110 kW (150 HP),Automatic,https://www.szalonauto.hu/szemelyauto/volkswag...


In [184]:
rows_without_HUF = hg_offers[~hg_offers['Price'].str.contains('HUF',  na=False)]
rows_without_HUF

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Euro norm,Body type,Color,City,Power,Transmission,Contact


In [185]:
# there's no missing local currency values. Otherwise, perform hg_offers.loc[rows_without_HUF.index, 'Price(EUR)'] *= 0,0025

In [186]:
hg_offers['Price']= hg_offers['Price'].str.split('(').str[0]

In [187]:
hg_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Price         260 non-null    object 
 1   Make          260 non-null    object 
 2   Model         260 non-null    object 
 3   Trim          168 non-null    object 
 4   Mi            215 non-null    object 
 5   Year          260 non-null    int64  
 6   Fuel type     194 non-null    object 
 7   Euro norm     1 non-null      float64
 8   Body type     223 non-null    object 
 9   Color         98 non-null     object 
 10  City          145 non-null    object 
 11  Power         259 non-null    object 
 12  Transmission  215 non-null    object 
 13  Contact       260 non-null    object 
dtypes: float64(1), int64(1), object(12)
memory usage: 28.6+ KB


In [188]:
hg_offers.dropna(subset = ['Mi', 'Trim', 'Fuel type', 'Body type', 'Power', 'Transmission'], inplace=True)

In [189]:
hg_offers['Emission standard'] =hg_offers['Year'].apply(calculate_euro_norm)
hg_offers['Price(EUR)'] = hg_offers['Price'].apply(clean_price)
hg_offers['Mileage(km)'] = hg_offers['Mi'].apply(clean_mi)
hg_offers['kW'] = hg_offers['Power'].apply(power_kW_split)

In [190]:
hg_offers.drop(columns= ['Price', 'Mi', 'Power', 'Euro norm'], inplace= True)

In [191]:
hg_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 100 entries, 0 to 258
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               100 non-null    object 
 1   Model              100 non-null    object 
 2   Trim               100 non-null    object 
 3   Year               100 non-null    int64  
 4   Fuel type          100 non-null    object 
 5   Body type          100 non-null    object 
 6   Color              60 non-null     object 
 7   City               72 non-null     object 
 8   Transmission       100 non-null    object 
 9   Contact            100 non-null    object 
 10  Emission standard  100 non-null    int64  
 11  Price(EUR)         100 non-null    int64  
 12  Mileage(km)        100 non-null    int64  
 13  kW                 100 non-null    float64
dtypes: float64(1), int64(4), object(9)
memory usage: 11.7+ KB


In [192]:
hg_offers.loc[hg_offers['City'].isna(), 'City'] = "NA"
hg_offers.loc[hg_offers['Color'].isna(), 'Color'] = "NA"


In [193]:
hg_offers['Country'] = "Hungary"

In [194]:
hg_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW,Country
0,Kia,Sportage,1.6 GDi,2023,Petrol,Crossover,Silver,Szombathely,Automatic,https://joautok.hu/hasznaltauto/kia/sportage/g...,6,26512,3999,110.0,Hungary
1,Kia,Xceed,1.6,2024,Petrol,Crossover,White,Szombathely,Automatic,https://www.szalonauto.hu/szemelyauto/kia/xcee...,6,24606,3,118.0,Hungary
2,Kia,Sportage,1.6 GDi,2024,Petrol,Crossover,Gray,Szombathely,Manual,https://www.hasznaltauto.hu/szemelyauto/kia/sp...,6,25345,3,111.0,Hungary
5,Volkswagen,Tiguan,2.0 TDI 4Motion,2024,Diesel,Crossover,Black,Kisvárda,Automatic,https://www.szalonauto.hu/szemelyauto/volkswag...,6,47671,0,110.0,Hungary
6,Citroen,C4,1.2 puretech,2023,Petrol,Hatchback,Black,Budaörs,Automatic,https://joautok.hu/hasznaltauto/citroen/c4/c4-...,6,21539,2,96.0,Hungary


In [195]:
hg_offers.to_csv('cleaned_Hungary_offers.csv', index=False)

15. Czech Republic

In [196]:
cr_offers = pd.read_csv('Czech Republic.csv')
cr_offers

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"138,590 EUR (3,499,900 CZK)",Mercedes Benz,Gls-Class,580 4Matic,10 km,2023,Petrol,Suv,Black,Praha,360 kW (490 HP),Automatic,https://www.autanet.cz/detail/mercedes-benz/gl...
1,"106,912 EUR (2,699,900 CZK)",Mercedes Benz,Gle-Class,450 AMG 4MATIC,"6,460 km",2023,Petrol,Suv,Black,Praha,280 kW (381 HP),Automatic,http://auto.rychle.cz/osobni-auto-O/Mercedes-B...
2,"172,249 EUR (4,349,900 CZK)",Mercedes Benz,Sl-Class,4.0 AMG 55,"2,966 km",2023,Petrol,Convertible,Gray,Praha,350 kW (476 HP),Automatic,https://autocaris.cz/detail.php?inzerat=716194
3,"7,325 EUR (184,999 CZK)",Subaru,Forester,2.0,"240,856 km",2013,Diesel,Suv,White,Brno,108 kW (147 HP),Manual,https://www.annonce.cz/inzerat/subaru-forester...
4,"6,890 EUR (174,000 CZK)",Hyundai,Ix35,2.0 CRDi,"239,100 km",2010,Diesel,Crossover,Gray,Říčany,100 kW (136 HP),Manual,https://www.automodul.cz/hyundai-ix35-2-0-crdi...
5,"16,235 EUR (409,999 CZK)",Volkswagen,Touareg,4.0 TDI V8,"319,000 km",2010,Diesel,Suv,Brown,Brno,250 kW (340 HP),Automatic,https://www.cars.cz/inzerce/osobni-auto-O/Volk...
6,"37,614 EUR (949,900 CZK)",Tesla,Model S,electro,"84,670 km",2018,Electric,Sedan,Blue,Praha,193 kW (262 HP),Automatic,https://www.sauto.cz/osobni/detail/tesla/model...
7,"19,795 EUR (499,900 CZK)",Mercedes Benz,Gla-Class,220d 4Matic,"145,120 km",2016,Diesel,Suv,Black,Praha,130 kW (177 HP),Automatic,https://www.cars.cz/inzerce/osobni-auto-O/Merc...
8,"12,631 EUR (319,000 CZK)",Subaru,Outback,2.0 D,"236,188 km",2015,Diesel,Crossover,Blue,Olomouc,110 kW (150 HP),Automatic,https://www.autanet.cz/detail/subaru/outback/2...
9,"27,718 EUR (699,999 CZK)",Bmw,X5,M50d,"156,979 km",2017,Diesel,Suv,White,Olbramovice,280 kW (381 HP),Automatic,http://auto.rychle.cz/osobni-auto-O/BMW/X5/X5-...


In [197]:
rows_without_CZK = cr_offers[~cr_offers['Price'].str.contains('CZK', na=False)]

In [198]:
rows_without_CZK

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact


In [199]:
cr_offers['Price'] = cr_offers['Price'].str.split('(').str[0]

In [200]:
cr_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"138,590 EUR",Mercedes Benz,Gls-Class,580 4Matic,10 km,2023,Petrol,Suv,Black,Praha,360 kW (490 HP),Automatic,https://www.autanet.cz/detail/mercedes-benz/gl...
1,"106,912 EUR",Mercedes Benz,Gle-Class,450 AMG 4MATIC,"6,460 km",2023,Petrol,Suv,Black,Praha,280 kW (381 HP),Automatic,http://auto.rychle.cz/osobni-auto-O/Mercedes-B...
2,"172,249 EUR",Mercedes Benz,Sl-Class,4.0 AMG 55,"2,966 km",2023,Petrol,Convertible,Gray,Praha,350 kW (476 HP),Automatic,https://autocaris.cz/detail.php?inzerat=716194
3,"7,325 EUR",Subaru,Forester,2.0,"240,856 km",2013,Diesel,Suv,White,Brno,108 kW (147 HP),Manual,https://www.annonce.cz/inzerat/subaru-forester...
4,"6,890 EUR",Hyundai,Ix35,2.0 CRDi,"239,100 km",2010,Diesel,Crossover,Gray,Říčany,100 kW (136 HP),Manual,https://www.automodul.cz/hyundai-ix35-2-0-crdi...


In [201]:
cr_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 143 entries, 0 to 142
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Price         143 non-null    object
 1   Make          143 non-null    object
 2   Model         143 non-null    object
 3   Trim          136 non-null    object
 4   Mi            143 non-null    object
 5   Year          143 non-null    int64 
 6   Fuel type     143 non-null    object
 7   Body type     143 non-null    object
 8   Color         143 non-null    object
 9   City          143 non-null    object
 10  Power         143 non-null    object
 11  Transmission  139 non-null    object
 12  Contact       143 non-null    object
dtypes: int64(1), object(12)
memory usage: 14.7+ KB


In [202]:
cr_offers['Emission standard'] =cr_offers['Year'].apply(calculate_euro_norm)
cr_offers['Price(EUR)'] = cr_offers['Price'].apply(clean_price)
cr_offers['Mileage(km)'] = cr_offers['Mi'].apply(clean_mi)
cr_offers['kW'] = cr_offers['Power'].apply(power_kW_split)

In [203]:
cr_offers.loc[rows_without_CZK.index, 'Price(EUR)'] *= 0.040

In [204]:
cr_offers.drop(columns= ['Price', 'Mi', 'Power'], inplace= True)

In [205]:
cr_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 143 entries, 0 to 142
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               143 non-null    object 
 1   Model              143 non-null    object 
 2   Trim               136 non-null    object 
 3   Year               143 non-null    int64  
 4   Fuel type          143 non-null    object 
 5   Body type          143 non-null    object 
 6   Color              143 non-null    object 
 7   City               143 non-null    object 
 8   Transmission       139 non-null    object 
 9   Contact            143 non-null    object 
 10  Emission standard  143 non-null    int64  
 11  Price(EUR)         143 non-null    int64  
 12  Mileage(km)        143 non-null    int64  
 13  kW                 143 non-null    float64
dtypes: float64(1), int64(4), object(9)
memory usage: 15.8+ KB


In [207]:
#cr_offers.drop(columns= ['Euro norm'], inplace= True)

In [208]:
cr_offers.dropna(subset=['Transmission','Trim', 'kW'], inplace =True)

In [209]:
cr_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 132 entries, 0 to 142
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               132 non-null    object 
 1   Model              132 non-null    object 
 2   Trim               132 non-null    object 
 3   Year               132 non-null    int64  
 4   Fuel type          132 non-null    object 
 5   Body type          132 non-null    object 
 6   Color              132 non-null    object 
 7   City               132 non-null    object 
 8   Transmission       132 non-null    object 
 9   Contact            132 non-null    object 
 10  Emission standard  132 non-null    int64  
 11  Price(EUR)         132 non-null    int64  
 12  Mileage(km)        132 non-null    int64  
 13  kW                 132 non-null    float64
dtypes: float64(1), int64(4), object(9)
memory usage: 15.5+ KB


In [210]:
cr_offers['Country'] = "Czech Republic"

In [211]:
cr_offers.to_csv('cleaned_Czech_offers.csv', index=False)

16. Romania

In [212]:
rm_offers = pd.read_csv('Romania.csv')
rm_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact,Euro norm
0,"12,650 EUR",Land Rover,Range Rover Sport,3.0,"257,000 km",2012,Diesel,Suv,Black,Bucureşti,188 kW (256 HP),Automatic,https://www.anuntul.ro/anunt-autoturism-land-r...,
1,"59,269 EUR",Mercedes Benz,Gls-Class,,"99,988 km",2018,Petrol,Suv,Gray,Constanţa,182 kW (248 HP),Automatic,https://www.leasingsh.ro/mercedes-benz-gls-400...,
2,"48,269 EUR",Volvo,Xc90,2.0,"50,000 km",2018,Petrol,Suv,Black,Constanţa,170 kW (231 HP),Automatic,https://www.leasingsh.ro/volvo-xc-90-2018-VOL4...,
3,"17,493 EUR",Volkswagen,T-Cross,1.0 tsi,"70,400 km",2021,Petrol,Crossover,Gray,Bucureşti,70 kW (95 HP),Automatic,https://www.anuntul.ro/anunt-autoturism-volksw...,
4,"20,450 EUR",Bmw,X1,,"189,093 km",2017,Diesel,Suv,Black,Ploieşti,110 kW (150 HP),Automatic,https://www.anuntul.ro/anunt-autoturism-bmw-x1...,


In [213]:
rm_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104 entries, 0 to 103
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Price         104 non-null    object 
 1   Make          104 non-null    object 
 2   Model         104 non-null    object 
 3   Trim          90 non-null     object 
 4   Mi            104 non-null    object 
 5   Year          104 non-null    int64  
 6   Fuel type     101 non-null    object 
 7   Body type     104 non-null    object 
 8   Color         71 non-null     object 
 9   City          104 non-null    object 
 10  Power         91 non-null     object 
 11  Transmission  93 non-null     object 
 12  Contact       104 non-null    object 
 13  Euro norm     44 non-null     float64
dtypes: float64(1), int64(1), object(12)
memory usage: 11.5+ KB


In [214]:
rm_offers.loc[rm_offers['Color'].isna(), 'Color'] = "NA"
rm_offers.loc[rm_offers['City'].isna(), 'City'] = "NA"

In [215]:
rm_offers['Emission standard'] =rm_offers['Year'].apply(calculate_euro_norm)
rm_offers['Price(EUR)'] = rm_offers['Price'].apply(clean_price)
rm_offers['Mileage(km)'] = rm_offers['Mi'].apply(clean_mi)
rm_offers['kW'] = rm_offers['Power'].apply(power_kW_split)

In [216]:
rm_offers.drop(columns= ['Price', 'Mi', 'Power', 'Euro norm'], inplace= True)

In [217]:
rm_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104 entries, 0 to 103
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               104 non-null    object 
 1   Model              104 non-null    object 
 2   Trim               90 non-null     object 
 3   Year               104 non-null    int64  
 4   Fuel type          101 non-null    object 
 5   Body type          104 non-null    object 
 6   Color              104 non-null    object 
 7   City               104 non-null    object 
 8   Transmission       93 non-null     object 
 9   Contact            104 non-null    object 
 10  Emission standard  104 non-null    int64  
 11  Price(EUR)         104 non-null    int64  
 12  Mileage(km)        104 non-null    int64  
 13  kW                 91 non-null     float64
dtypes: float64(1), int64(4), object(9)
memory usage: 11.5+ KB


In [218]:
rm_offers = rm_offers.dropna()

In [219]:
rm_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 64 entries, 0 to 103
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               64 non-null     object 
 1   Model              64 non-null     object 
 2   Trim               64 non-null     object 
 3   Year               64 non-null     int64  
 4   Fuel type          64 non-null     object 
 5   Body type          64 non-null     object 
 6   Color              64 non-null     object 
 7   City               64 non-null     object 
 8   Transmission       64 non-null     object 
 9   Contact            64 non-null     object 
 10  Emission standard  64 non-null     int64  
 11  Price(EUR)         64 non-null     int64  
 12  Mileage(km)        64 non-null     int64  
 13  kW                 64 non-null     float64
dtypes: float64(1), int64(4), object(9)
memory usage: 7.5+ KB


In [220]:
rm_offers['Country'] = "Romania"

In [221]:
rm_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW,Country
0,Land Rover,Range Rover Sport,3.0,2012,Diesel,Suv,Black,Bucureşti,Automatic,https://www.anuntul.ro/anunt-autoturism-land-r...,5,12650,257000,188.0,Romania
2,Volvo,Xc90,2.0,2018,Petrol,Suv,Black,Constanţa,Automatic,https://www.leasingsh.ro/volvo-xc-90-2018-VOL4...,6,48269,50000,170.0,Romania
3,Volkswagen,T-Cross,1.0 tsi,2021,Petrol,Crossover,Gray,Bucureşti,Automatic,https://www.anuntul.ro/anunt-autoturism-volksw...,6,17493,70400,70.0,Romania
5,Mazda,3,1.6,2010,Petrol,Hatchback,Black,Cluj Napoca,Manual,https://www.anuntul.ro/anunt-autoturism-mazda-...,5,6795,163000,77.0,Romania
6,Volvo,Xc90,2.0,2020,Diesel,Suv,Gray,Bucureşti,Automatic,https://www.leasingsh.ro/volvo-xc-90-2020-VOL5...,6,47899,68500,175.0,Romania


In [222]:
rm_offers.to_csv('cleaned_Romania_offers.csv', index=False)