In [8]:
#conda install --upgrade bottleneck

In [1]:
import pandas as pd
import numpy as np
import re


1. Austria

In [2]:
as_offers = pd.read_csv("Austria.csv")
as_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Euro norm,Body type,Color,City,Power,Transmission,Contact
0,"15,499 EUR",Volkswagen,Sharan,2.0 TDI,"182,000 km",2013,Diesel,5.0,Van,Gray,Braunau Am Inn,104 kW (141 HP),Manual,https://www.zweispurig.at/vw-sharan-gebrauchtw...
1,"8,900 EUR",Audi,A6,2.7,"250,000 km",2009,Diesel,5.0,Sedan,Silver,Salzburg,141 kW (192 HP),Manual,https://www.zweispurig.at/audi-a6-gebrauchtwag...
2,"25,999 EUR",Porsche,Cayenne,3.0 diesel,"256,000 km",2011,Diesel,5.0,Suv,Black,Braunau Am Inn,182 kW (248 HP),Automatic,https://www.zweispurig.at/porsche-cayenne-gebr...
3,"18,790 EUR",Volkswagen,Passat,2.0 TDI SCR,"94,187 km",2019,Diesel,6.0,Wagon,Silver,Wolfsberg,111 kW (151 HP),Manual,https://www.zweispurig.at/vw-passat-gebrauchtw...
4,"18,990 EUR",Volkswagen,Passat,2.0 TDI SCR,"77,167 km",2019,Diesel,6.0,Wagon,Black,Wels,111 kW (151 HP),Automatic,https://www.dasweltauto.at/vehicle/4633600081


In [3]:
# check the data shape
as_offers.shape

(260, 14)

In [4]:
# Check data type
as_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Price         260 non-null    object 
 1   Make          260 non-null    object 
 2   Model         260 non-null    object 
 3   Trim          249 non-null    object 
 4   Mi            260 non-null    object 
 5   Year          260 non-null    int64  
 6   Fuel type     260 non-null    object 
 7   Euro norm     189 non-null    float64
 8   Body type     260 non-null    object 
 9   Color         260 non-null    object 
 10  City          260 non-null    object 
 11  Power         260 non-null    object 
 12  Transmission  238 non-null    object 
 13  Contact       260 non-null    object 
dtypes: float64(1), int64(1), object(12)
memory usage: 28.6+ KB


Kilowatts are a metric unit used to measure power - in this case, the rate at which a car's engine turns the energy stored in fossil fuels into movement. Usually, the more kilowatts an engine produces, the faster a car will accelerate.

Power is measured in horsepower (hp) or in Watts (kW), the two units are directly proportional. The hp measure is a historical one based on old imperial units (like inches, feet and miles), the kW measure is from the metric (SI) system which is an attempt to bring some global conformity. In Europe, the metric system (kW) is by-and-large the norm, while in the US, the hp is the unit of choice.

In [5]:
# Price needs to be modified by removing the "EUR" from the string value and transform it into numeric value
# "Mi" also needs to remove the "km" in the end and "," in the middle and turns into numeric value
# We'll first split the column "Power" and only keep kW in numerical type


In [6]:
# Define a function for each of the change point I want to make
def clean_price(row):
    cleaned_row = int(str(row).replace(',', '').replace('EUR', ''))
    return cleaned_row

In [7]:
def power_kW_split(row):
    matches = re.findall(r'(\d+(\.\d+)?)\s*kW', str(row))
    
    # Check if there are any matches before attempting to access the first element
    if matches:
        kW_value = float(matches[0][0]) 
        return kW_value
    else:
        return None


In [8]:
def clean_mi(row):
    cleaned_row = int(str(row).replace(',', '').replace('km', ''))
    return cleaned_row

In [9]:
#calculate based on the year of each norm
def calculate_euro_norm(row):
    if row >= 2014:
        return 6
    elif row >= 2009:
        return 5
    elif row >=2005:
        return 4
    elif row >= 2000:
        return 3
    elif row >= 1996:
        return 2
    else:
        return 1

In [10]:
as_offers['Price(EUR)'] = as_offers['Price'].apply(clean_price)
as_offers['Mileage(km)'] = as_offers['Mi'].apply(clean_mi)
as_offers['kW'] = as_offers['Power'].apply(power_kW_split)
as_offers['Emission standard'] = as_offers['Year'].apply(calculate_euro_norm)

In [11]:
as_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Euro norm,Body type,Color,City,Power,Transmission,Contact,Price(EUR),Mileage(km),kW,Emission standard
0,"15,499 EUR",Volkswagen,Sharan,2.0 TDI,"182,000 km",2013,Diesel,5.0,Van,Gray,Braunau Am Inn,104 kW (141 HP),Manual,https://www.zweispurig.at/vw-sharan-gebrauchtw...,15499,182000,104.0,5
1,"8,900 EUR",Audi,A6,2.7,"250,000 km",2009,Diesel,5.0,Sedan,Silver,Salzburg,141 kW (192 HP),Manual,https://www.zweispurig.at/audi-a6-gebrauchtwag...,8900,250000,141.0,5
2,"25,999 EUR",Porsche,Cayenne,3.0 diesel,"256,000 km",2011,Diesel,5.0,Suv,Black,Braunau Am Inn,182 kW (248 HP),Automatic,https://www.zweispurig.at/porsche-cayenne-gebr...,25999,256000,182.0,5
3,"18,790 EUR",Volkswagen,Passat,2.0 TDI SCR,"94,187 km",2019,Diesel,6.0,Wagon,Silver,Wolfsberg,111 kW (151 HP),Manual,https://www.zweispurig.at/vw-passat-gebrauchtw...,18790,94187,111.0,6
4,"18,990 EUR",Volkswagen,Passat,2.0 TDI SCR,"77,167 km",2019,Diesel,6.0,Wagon,Black,Wels,111 kW (151 HP),Automatic,https://www.dasweltauto.at/vehicle/4633600081,18990,77167,111.0,6


In [12]:
# Now the columns of "Price", "Mi", 'Euro norm' and "Power" can be deleted
as_offers.drop(columns = ["Price", "Mi", "Power", 'Euro norm'], inplace = True)

In [13]:
as_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Price(EUR),Mileage(km),kW,Emission standard
0,Volkswagen,Sharan,2.0 TDI,2013,Diesel,Van,Gray,Braunau Am Inn,Manual,https://www.zweispurig.at/vw-sharan-gebrauchtw...,15499,182000,104.0,5
1,Audi,A6,2.7,2009,Diesel,Sedan,Silver,Salzburg,Manual,https://www.zweispurig.at/audi-a6-gebrauchtwag...,8900,250000,141.0,5
2,Porsche,Cayenne,3.0 diesel,2011,Diesel,Suv,Black,Braunau Am Inn,Automatic,https://www.zweispurig.at/porsche-cayenne-gebr...,25999,256000,182.0,5
3,Volkswagen,Passat,2.0 TDI SCR,2019,Diesel,Wagon,Silver,Wolfsberg,Manual,https://www.zweispurig.at/vw-passat-gebrauchtw...,18790,94187,111.0,6
4,Volkswagen,Passat,2.0 TDI SCR,2019,Diesel,Wagon,Black,Wels,Automatic,https://www.dasweltauto.at/vehicle/4633600081,18990,77167,111.0,6


In [14]:
as_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               260 non-null    object 
 1   Model              260 non-null    object 
 2   Trim               249 non-null    object 
 3   Year               260 non-null    int64  
 4   Fuel type          260 non-null    object 
 5   Body type          260 non-null    object 
 6   Color              260 non-null    object 
 7   City               260 non-null    object 
 8   Transmission       238 non-null    object 
 9   Contact            260 non-null    object 
 10  Price(EUR)         260 non-null    int64  
 11  Mileage(km)        260 non-null    int64  
 12  kW                 260 non-null    float64
 13  Emission standard  260 non-null    int64  
dtypes: float64(1), int64(4), object(9)
memory usage: 28.6+ KB


In [15]:
# As we can see there are some missing values
as_offers.isna().sum()

Make                  0
Model                 0
Trim                 11
Year                  0
Fuel type             0
Body type             0
Color                 0
City                  0
Transmission         22
Contact               0
Price(EUR)            0
Mileage(km)           0
kW                    0
Emission standard     0
dtype: int64

In [17]:
# Drop the missing value for "Trim"
as_offers.dropna(inplace=True)
as_offers.isna().sum()


Make                 0
Model                0
Trim                 0
Year                 0
Fuel type            0
Body type            0
Color                0
City                 0
Transmission         0
Contact              0
Price(EUR)           0
Mileage(km)          0
kW                   0
Emission standard    0
dtype: int64

"Euro norm" refers to the European emission standards or regulations that set limits on the amount of pollutants that vehicles are allowed to emit. 
The Euro norms are a series of regulations that specify the acceptable limits for various pollutants, including nitrogen oxides (NOx), particulate matter (PM), carbon monoxide (CO), hydrocarbons (HC), and others. These standards have been progressively tightened over the years to encourage the development and adoption of cleaner and more environmentally friendly vehicle technologies.

For example, Euro 1, Euro 2, Euro 3, and so on, represent different stages of emission standards. As vehicles advance through these stages, they must comply with stricter emission limits. The Euro 6 standard, which was implemented in 2014 for passenger cars, is one of the latest and most stringent standards, setting strict limits on NOx and other pollutants.

In [18]:
as_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Price(EUR),Mileage(km),kW,Emission standard
0,Volkswagen,Sharan,2.0 TDI,2013,Diesel,Van,Gray,Braunau Am Inn,Manual,https://www.zweispurig.at/vw-sharan-gebrauchtw...,15499,182000,104.0,5
1,Audi,A6,2.7,2009,Diesel,Sedan,Silver,Salzburg,Manual,https://www.zweispurig.at/audi-a6-gebrauchtwag...,8900,250000,141.0,5
2,Porsche,Cayenne,3.0 diesel,2011,Diesel,Suv,Black,Braunau Am Inn,Automatic,https://www.zweispurig.at/porsche-cayenne-gebr...,25999,256000,182.0,5
3,Volkswagen,Passat,2.0 TDI SCR,2019,Diesel,Wagon,Silver,Wolfsberg,Manual,https://www.zweispurig.at/vw-passat-gebrauchtw...,18790,94187,111.0,6
4,Volkswagen,Passat,2.0 TDI SCR,2019,Diesel,Wagon,Black,Wels,Automatic,https://www.dasweltauto.at/vehicle/4633600081,18990,77167,111.0,6


In [19]:
# Let's check "Transmission"
as_offers['Transmission'].value_counts()


Transmission
Manual            119
Automatic         109
Semi Automatic      1
Name: count, dtype: int64

In [20]:
# create a new column with the country name "Austria"
as_offers['Country'] = "Austria" 

In [21]:
as_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Price(EUR),Mileage(km),kW,Emission standard,Country
0,Volkswagen,Sharan,2.0 TDI,2013,Diesel,Van,Gray,Braunau Am Inn,Manual,https://www.zweispurig.at/vw-sharan-gebrauchtw...,15499,182000,104.0,5,Austria
1,Audi,A6,2.7,2009,Diesel,Sedan,Silver,Salzburg,Manual,https://www.zweispurig.at/audi-a6-gebrauchtwag...,8900,250000,141.0,5,Austria
2,Porsche,Cayenne,3.0 diesel,2011,Diesel,Suv,Black,Braunau Am Inn,Automatic,https://www.zweispurig.at/porsche-cayenne-gebr...,25999,256000,182.0,5,Austria
3,Volkswagen,Passat,2.0 TDI SCR,2019,Diesel,Wagon,Silver,Wolfsberg,Manual,https://www.zweispurig.at/vw-passat-gebrauchtw...,18790,94187,111.0,6,Austria
4,Volkswagen,Passat,2.0 TDI SCR,2019,Diesel,Wagon,Black,Wels,Automatic,https://www.dasweltauto.at/vehicle/4633600081,18990,77167,111.0,6,Austria


In [22]:
# Save it to a new csv
as_offers.to_csv('cleaned_Austria_offers.csv', index=False)

2. Belgium

In [23]:
be_offers = pd.read_csv('Belgium.csv')
be_offers.head()

Unnamed: 0,Price,Make,Model,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact,Trim,Euro norm
0,"59,999 EUR",Audi,Q4,"16,290 km",2022,Electric,Crossover,Blue,Boortmeerbeek,222 kW (302 HP),Automatic,https://www.autotrends.be/fr/occasions/voiture...,,
1,"64,500 EUR",Bmw,I8,"97,800 km",2014,Hybrid,Coupe,Black,Hasselt,172 kW (234 HP),Automatic,https://autos.tweedehands.net/bmw/bmw-i8-m-pak...,1.5,6.0
2,"46,990 EUR",Volkswagen,Tiguan,9 km,2024,Petrol,Crossover,Black,Tournai,111 kW (151 HP),Automatic,https://www.vroom.be/fr/voitures-neuves/volksw...,1.5,6.0
3,"35,990 EUR",Land Rover,Evoque,"84,447 km",2020,Diesel,Suv,Gray,Mons,111 kW (151 HP),Automatic,https://www.vroom.be/fr/voitures-occasion/land...,2.0,
4,"29,990 EUR",Audi,A5,"77,150 km",2017,Diesel,Convertible,Gray,Sint Niklaas,141 kW (192 HP),Automatic,https://www.vroom.be/fr/voitures-occasion/audi...,2.0,6.0


In [24]:
be_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Price         260 non-null    object 
 1   Make          260 non-null    object 
 2   Model         260 non-null    object 
 3   Mi            260 non-null    object 
 4   Year          260 non-null    int64  
 5   Fuel type     260 non-null    object 
 6   Body type     260 non-null    object 
 7   Color         260 non-null    object 
 8   City          260 non-null    object 
 9   Power         260 non-null    object 
 10  Transmission  260 non-null    object 
 11  Contact       260 non-null    object 
 12  Trim          200 non-null    object 
 13  Euro norm     207 non-null    float64
dtypes: float64(1), int64(1), object(12)
memory usage: 28.6+ KB


In [25]:
be_offers['Price(EUR)'] = be_offers['Price'].apply(clean_price)
be_offers['Mileage(km)'] = be_offers['Mi'].apply(clean_mi)
be_offers['kW'] = be_offers['Power'].apply(power_kW_split)
be_offers['Emission standard'] = be_offers['Year'].apply(calculate_euro_norm)

In [26]:
be_offers.head()

Unnamed: 0,Price,Make,Model,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact,Trim,Euro norm,Price(EUR),Mileage(km),kW,Emission standard
0,"59,999 EUR",Audi,Q4,"16,290 km",2022,Electric,Crossover,Blue,Boortmeerbeek,222 kW (302 HP),Automatic,https://www.autotrends.be/fr/occasions/voiture...,,,59999,16290,222.0,6
1,"64,500 EUR",Bmw,I8,"97,800 km",2014,Hybrid,Coupe,Black,Hasselt,172 kW (234 HP),Automatic,https://autos.tweedehands.net/bmw/bmw-i8-m-pak...,1.5,6.0,64500,97800,172.0,6
2,"46,990 EUR",Volkswagen,Tiguan,9 km,2024,Petrol,Crossover,Black,Tournai,111 kW (151 HP),Automatic,https://www.vroom.be/fr/voitures-neuves/volksw...,1.5,6.0,46990,9,111.0,6
3,"35,990 EUR",Land Rover,Evoque,"84,447 km",2020,Diesel,Suv,Gray,Mons,111 kW (151 HP),Automatic,https://www.vroom.be/fr/voitures-occasion/land...,2.0,,35990,84447,111.0,6
4,"29,990 EUR",Audi,A5,"77,150 km",2017,Diesel,Convertible,Gray,Sint Niklaas,141 kW (192 HP),Automatic,https://www.vroom.be/fr/voitures-occasion/audi...,2.0,6.0,29990,77150,141.0,6


In [27]:
be_offers.isna().sum()

Price                 0
Make                  0
Model                 0
Mi                    0
Year                  0
Fuel type             0
Body type             0
Color                 0
City                  0
Power                 0
Transmission          0
Contact               0
Trim                 60
Euro norm            53
Price(EUR)            0
Mileage(km)           0
kW                    0
Emission standard     0
dtype: int64

In [28]:
# Romove the original columns "Price", "Mi",'Euro norm',"Power"
be_offers.drop(columns=["Price", "Mi", "Euro norm", "Power"], inplace = True)

In [29]:
be_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               260 non-null    object 
 1   Model              260 non-null    object 
 2   Year               260 non-null    int64  
 3   Fuel type          260 non-null    object 
 4   Body type          260 non-null    object 
 5   Color              260 non-null    object 
 6   City               260 non-null    object 
 7   Transmission       260 non-null    object 
 8   Contact            260 non-null    object 
 9   Trim               200 non-null    object 
 10  Price(EUR)         260 non-null    int64  
 11  Mileage(km)        260 non-null    int64  
 12  kW                 260 non-null    float64
 13  Emission standard  260 non-null    int64  
dtypes: float64(1), int64(4), object(9)
memory usage: 28.6+ KB


In [30]:
be_offers.isna().sum()

Make                  0
Model                 0
Year                  0
Fuel type             0
Body type             0
Color                 0
City                  0
Transmission          0
Contact               0
Trim                 60
Price(EUR)            0
Mileage(km)           0
kW                    0
Emission standard     0
dtype: int64

In [31]:
# delete the missing values in "Trim" and "kW"
be_offers.dropna(subset= ['Trim'], inplace=True)

In [32]:
be_offers.isna().sum()

Make                 0
Model                0
Year                 0
Fuel type            0
Body type            0
Color                0
City                 0
Transmission         0
Contact              0
Trim                 0
Price(EUR)           0
Mileage(km)          0
kW                   0
Emission standard    0
dtype: int64

In [33]:
be_offers['Transmission'].value_counts()

Transmission
Automatic    168
Manual        32
Name: count, dtype: int64

In [34]:
be_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 200 entries, 1 to 258
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               200 non-null    object 
 1   Model              200 non-null    object 
 2   Year               200 non-null    int64  
 3   Fuel type          200 non-null    object 
 4   Body type          200 non-null    object 
 5   Color              200 non-null    object 
 6   City               200 non-null    object 
 7   Transmission       200 non-null    object 
 8   Contact            200 non-null    object 
 9   Trim               200 non-null    object 
 10  Price(EUR)         200 non-null    int64  
 11  Mileage(km)        200 non-null    int64  
 12  kW                 200 non-null    float64
 13  Emission standard  200 non-null    int64  
dtypes: float64(1), int64(4), object(9)
memory usage: 23.4+ KB


In [35]:
#add another column "Country"
be_offers['Country'] = "Belgium"

In [36]:
be_offers.head(20)

Unnamed: 0,Make,Model,Year,Fuel type,Body type,Color,City,Transmission,Contact,Trim,Price(EUR),Mileage(km),kW,Emission standard,Country
1,Bmw,I8,2014,Hybrid,Coupe,Black,Hasselt,Automatic,https://autos.tweedehands.net/bmw/bmw-i8-m-pak...,1.5,64500,97800,172.0,6,Belgium
2,Volkswagen,Tiguan,2024,Petrol,Crossover,Black,Tournai,Automatic,https://www.vroom.be/fr/voitures-neuves/volksw...,1.5,46990,9,111.0,6,Belgium
3,Land Rover,Evoque,2020,Diesel,Suv,Gray,Mons,Automatic,https://www.vroom.be/fr/voitures-occasion/land...,2.0,35990,84447,111.0,6,Belgium
4,Audi,A5,2017,Diesel,Convertible,Gray,Sint Niklaas,Automatic,https://www.vroom.be/fr/voitures-occasion/audi...,2.0,29990,77150,141.0,6,Belgium
5,Ford,Edge,2019,Diesel,Suv,Black,Boortmeerbeek,Automatic,https://www.autogids.be/detail-id--9433691--fo...,2.0 TDCi,29700,94614,156.0,6,Belgium
6,Land Rover,Range Rover,2013,Diesel,Suv,Blue,Brugge,Automatic,https://www.vroom.be/fr/voitures-occasion/land...,3.0,27995,216829,192.0,5,Belgium
7,Mercedes Benz,C-Class,2014,Diesel,Wagon,Black,Zwevegem,Automatic,https://www.autogids.be/detail-id--9117596--me...,250 D,22990,153255,152.0,6,Belgium
8,Ford,Puma,2021,Petrol,Hatchback,Green,Machelen,Manual,https://www.autogids.be/detail-id--9468301--fo...,1.5,28450,19483,149.0,6,Belgium
9,Peugeot,508,2021,Petrol,Sedan,Gray,Turnhout,Automatic,https://www.vroom.be/fr/voitures-occasion/peug...,1.6,28990,54503,167.0,6,Belgium
10,Audi,A1,2022,Petrol,Mini,Gray,Hasselt,Automatic,https://www.vroom.be/fr/voitures-occasion/audi...,1.0 25 TFSI,24500,35984,70.0,6,Belgium


In [37]:
be_offers.to_csv('cleaned_Belgium_offers.csv', index=False)

3. France

In [38]:
fr_offers = pd.read_csv('France.csv')
fr_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"54,590 EUR",Lexus,Rx,450h,"50,200 km",2021,Hybrid,Suv,White,Champagne-au-Mont-d'Or,195 kW (265 HP),Automatic,https://www.leparking.fr/voiture-occasion-deta...
1,"29,299 EUR",Lexus,Nx,300h,"72,625 km",2018,Hybrid,Suv,White,Chambourcy,155 kW (211 HP),Automatic,https://www.paruvendu.fr/a/voiture-occasion/le...
2,"29,490 EUR",Seat,Ateca,1.5 tsi,"41,446 km",2022,Petrol,Suv,Gray,Seynod,111 kW (151 HP),Automatic,https://www.leparking.fr/voiture-occasion-deta...
3,"59,990 EUR",Audi,Sq7,4.0 TDI,"107,844 km",2018,Diesel,Suv,Black,Liévin,324 kW (441 HP),Automatic,https://www.leparking.fr/voiture-occasion-deta...
4,"19,990 EUR",Volkswagen,T-Cross,1.0 tsi,"37,916 km",2019,Petrol,Crossover,Gray,Albi,115 kW (156 HP),Automatic,https://fr.renew.auto/achat-vehicules-occasion...


In [39]:
fr_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 221 entries, 0 to 220
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Price         221 non-null    object
 1   Make          221 non-null    object
 2   Model         221 non-null    object
 3   Trim          203 non-null    object
 4   Mi            221 non-null    object
 5   Year          221 non-null    int64 
 6   Fuel type     221 non-null    object
 7   Body type     221 non-null    object
 8   Color         221 non-null    object
 9   City          219 non-null    object
 10  Power         221 non-null    object
 11  Transmission  221 non-null    object
 12  Contact       221 non-null    object
dtypes: int64(1), object(12)
memory usage: 22.6+ KB


In [40]:
# Create a new column "Euro norm"  "Price", "Mi" and "Power"
fr_offers['Emission standard'] = fr_offers['Year'].apply(calculate_euro_norm)
fr_offers['Price(EUR)'] = fr_offers['Price'].apply(clean_price)
fr_offers['Mileage(km)'] = fr_offers['Mi'].apply(clean_mi)
fr_offers['kW'] = fr_offers['Power'].apply(power_kW_split)

In [41]:
fr_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW
0,"54,590 EUR",Lexus,Rx,450h,"50,200 km",2021,Hybrid,Suv,White,Champagne-au-Mont-d'Or,195 kW (265 HP),Automatic,https://www.leparking.fr/voiture-occasion-deta...,6,54590,50200,195.0
1,"29,299 EUR",Lexus,Nx,300h,"72,625 km",2018,Hybrid,Suv,White,Chambourcy,155 kW (211 HP),Automatic,https://www.paruvendu.fr/a/voiture-occasion/le...,6,29299,72625,155.0
2,"29,490 EUR",Seat,Ateca,1.5 tsi,"41,446 km",2022,Petrol,Suv,Gray,Seynod,111 kW (151 HP),Automatic,https://www.leparking.fr/voiture-occasion-deta...,6,29490,41446,111.0
3,"59,990 EUR",Audi,Sq7,4.0 TDI,"107,844 km",2018,Diesel,Suv,Black,Liévin,324 kW (441 HP),Automatic,https://www.leparking.fr/voiture-occasion-deta...,6,59990,107844,324.0
4,"19,990 EUR",Volkswagen,T-Cross,1.0 tsi,"37,916 km",2019,Petrol,Crossover,Gray,Albi,115 kW (156 HP),Automatic,https://fr.renew.auto/achat-vehicules-occasion...,6,19990,37916,115.0


In [42]:
# Now remove the original 3 columns
fr_offers.drop(columns=['Price', "Mi", 'Power'], inplace= True)

In [43]:
fr_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW
0,Lexus,Rx,450h,2021,Hybrid,Suv,White,Champagne-au-Mont-d'Or,Automatic,https://www.leparking.fr/voiture-occasion-deta...,6,54590,50200,195.0
1,Lexus,Nx,300h,2018,Hybrid,Suv,White,Chambourcy,Automatic,https://www.paruvendu.fr/a/voiture-occasion/le...,6,29299,72625,155.0
2,Seat,Ateca,1.5 tsi,2022,Petrol,Suv,Gray,Seynod,Automatic,https://www.leparking.fr/voiture-occasion-deta...,6,29490,41446,111.0
3,Audi,Sq7,4.0 TDI,2018,Diesel,Suv,Black,Liévin,Automatic,https://www.leparking.fr/voiture-occasion-deta...,6,59990,107844,324.0
4,Volkswagen,T-Cross,1.0 tsi,2019,Petrol,Crossover,Gray,Albi,Automatic,https://fr.renew.auto/achat-vehicules-occasion...,6,19990,37916,115.0


In [44]:
# Check missing values
fr_offers.isna().sum()

Make                  0
Model                 0
Trim                 18
Year                  0
Fuel type             0
Body type             0
Color                 0
City                  2
Transmission          0
Contact               0
Emission standard     0
Price(EUR)            0
Mileage(km)           0
kW                    0
dtype: int64

In [45]:
fr_offers.dropna(subset = ['Trim', 'kW'], inplace=True)

In [46]:
fr_offers.loc[fr_offers['City'].isna(), 'City'] = "NA"

In [47]:
fr_offers.isna().sum()

Make                 0
Model                0
Trim                 0
Year                 0
Fuel type            0
Body type            0
Color                0
City                 0
Transmission         0
Contact              0
Emission standard    0
Price(EUR)           0
Mileage(km)          0
kW                   0
dtype: int64

In [48]:
fr_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 203 entries, 0 to 220
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               203 non-null    object 
 1   Model              203 non-null    object 
 2   Trim               203 non-null    object 
 3   Year               203 non-null    int64  
 4   Fuel type          203 non-null    object 
 5   Body type          203 non-null    object 
 6   Color              203 non-null    object 
 7   City               203 non-null    object 
 8   Transmission       203 non-null    object 
 9   Contact            203 non-null    object 
 10  Emission standard  203 non-null    int64  
 11  Price(EUR)         203 non-null    int64  
 12  Mileage(km)        203 non-null    int64  
 13  kW                 203 non-null    float64
dtypes: float64(1), int64(4), object(9)
memory usage: 23.8+ KB


In [49]:
# Create a new column "Country"
fr_offers['Country'] = "France"

In [50]:
fr_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW,Country
0,Lexus,Rx,450h,2021,Hybrid,Suv,White,Champagne-au-Mont-d'Or,Automatic,https://www.leparking.fr/voiture-occasion-deta...,6,54590,50200,195.0,France
1,Lexus,Nx,300h,2018,Hybrid,Suv,White,Chambourcy,Automatic,https://www.paruvendu.fr/a/voiture-occasion/le...,6,29299,72625,155.0,France
2,Seat,Ateca,1.5 tsi,2022,Petrol,Suv,Gray,Seynod,Automatic,https://www.leparking.fr/voiture-occasion-deta...,6,29490,41446,111.0,France
3,Audi,Sq7,4.0 TDI,2018,Diesel,Suv,Black,Liévin,Automatic,https://www.leparking.fr/voiture-occasion-deta...,6,59990,107844,324.0,France
4,Volkswagen,T-Cross,1.0 tsi,2019,Petrol,Crossover,Gray,Albi,Automatic,https://fr.renew.auto/achat-vehicules-occasion...,6,19990,37916,115.0,France


In [51]:
fr_offers.to_csv('cleaned_France_offers.csv', index =False)

4. Germany

In [52]:
gr_offers = pd.read_csv('Germany.csv')
gr_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Euro norm,Body type,Color,City,Power,Transmission,Contact
0,"35,890 EUR",Mazda,Cx-5,2.5,"15,871 km",2023,Petrol,6.0,Crossover,Black,Munich,144 kW (196 HP),Automatic,https://www.12gebrauchtwagen.de/c/16/142601938...
1,"35,890 EUR",Mazda,Cx-5,2.5,"11,732 km",2023,Petrol,6.0,Crossover,Black,Hamburg,144 kW (196 HP),Automatic,https://www.moll-automobile.de/fahrzeug/385546...
2,"26,790 EUR",Volkswagen,Sharan,1.4 TSI,"93,649 km",2018,Diesel,6.0,Van,Silver,Übach Palenberg,110 kW (150 HP),Automatic,https://www.12gebrauchtwagen.de/c/4/1412086749...
3,"10,290 EUR",Hyundai,I30,1.4,"98,990 km",2017,Petrol,6.0,Wagon,Black,Gießen,74 kW (101 HP),Manual,https://www.12gebrauchtwagen.de/c/4/1413818076...
4,"16,924 EUR",Opel,Crossland X,1.2,"24,750 km",2022,Petrol,6.0,Suv,Green,Coesfeld,97 kW (132 HP),Manual,https://gebrauchtwagen.hyundai.de/de/fahrzeugs...


In [53]:
# Now change the "Price", "Mi" 'Euro norm' and "Power"
gr_offers['Price(EUR)'] = gr_offers['Price'].apply(clean_price)
gr_offers['Mileage(km)'] = gr_offers['Mi'].apply(clean_mi)
gr_offers['kW'] = gr_offers['Power'].apply(power_kW_split)
gr_offers['Emission standard'] =gr_offers['Year'].apply(calculate_euro_norm)

In [54]:
gr_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Euro norm,Body type,Color,City,Power,Transmission,Contact,Price(EUR),Mileage(km),kW,Emission standard
0,"35,890 EUR",Mazda,Cx-5,2.5,"15,871 km",2023,Petrol,6.0,Crossover,Black,Munich,144 kW (196 HP),Automatic,https://www.12gebrauchtwagen.de/c/16/142601938...,35890,15871,144.0,6
1,"35,890 EUR",Mazda,Cx-5,2.5,"11,732 km",2023,Petrol,6.0,Crossover,Black,Hamburg,144 kW (196 HP),Automatic,https://www.moll-automobile.de/fahrzeug/385546...,35890,11732,144.0,6
2,"26,790 EUR",Volkswagen,Sharan,1.4 TSI,"93,649 km",2018,Diesel,6.0,Van,Silver,Übach Palenberg,110 kW (150 HP),Automatic,https://www.12gebrauchtwagen.de/c/4/1412086749...,26790,93649,110.0,6
3,"10,290 EUR",Hyundai,I30,1.4,"98,990 km",2017,Petrol,6.0,Wagon,Black,Gießen,74 kW (101 HP),Manual,https://www.12gebrauchtwagen.de/c/4/1413818076...,10290,98990,74.0,6
4,"16,924 EUR",Opel,Crossland X,1.2,"24,750 km",2022,Petrol,6.0,Suv,Green,Coesfeld,97 kW (132 HP),Manual,https://gebrauchtwagen.hyundai.de/de/fahrzeugs...,16924,24750,97.0,6


In [55]:
gr_offers.drop(columns=['Price', 'Mi', 'Power'], inplace=True)

In [56]:
gr_offers.drop(columns=['Euro norm'], inplace=True)

In [57]:
gr_offers.isna().sum()

Make                  0
Model                 0
Trim                 13
Year                  0
Fuel type             0
Body type             0
Color                 0
City                  0
Transmission          0
Contact               0
Price(EUR)            0
Mileage(km)           0
kW                    0
Emission standard     0
dtype: int64

In [58]:
gr_offers = gr_offers.dropna()

In [59]:
gr_offers.isna().sum()

Make                 0
Model                0
Trim                 0
Year                 0
Fuel type            0
Body type            0
Color                0
City                 0
Transmission         0
Contact              0
Price(EUR)           0
Mileage(km)          0
kW                   0
Emission standard    0
dtype: int64

In [60]:
gr_offers['Transmission'].value_counts()

Transmission
Automatic    170
Manual        77
Name: count, dtype: int64

In [61]:
gr_offers['Country'] = "Germany"

In [62]:
gr_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Price(EUR),Mileage(km),kW,Emission standard,Country
0,Mazda,Cx-5,2.5,2023,Petrol,Crossover,Black,Munich,Automatic,https://www.12gebrauchtwagen.de/c/16/142601938...,35890,15871,144.0,6,Germany
1,Mazda,Cx-5,2.5,2023,Petrol,Crossover,Black,Hamburg,Automatic,https://www.moll-automobile.de/fahrzeug/385546...,35890,11732,144.0,6,Germany
2,Volkswagen,Sharan,1.4 TSI,2018,Diesel,Van,Silver,Übach Palenberg,Automatic,https://www.12gebrauchtwagen.de/c/4/1412086749...,26790,93649,110.0,6,Germany
3,Hyundai,I30,1.4,2017,Petrol,Wagon,Black,Gießen,Manual,https://www.12gebrauchtwagen.de/c/4/1413818076...,10290,98990,74.0,6,Germany
4,Opel,Crossland X,1.2,2022,Petrol,Suv,Green,Coesfeld,Manual,https://gebrauchtwagen.hyundai.de/de/fahrzeugs...,16924,24750,97.0,6,Germany


In [63]:
gr_offers.to_csv('cleaned_Germany_offers.csv', index=False)

5. the Netherlands

In [64]:
nl_offers = pd.read_csv('Netherlands.csv')
nl_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact,Euro norm
0,"13,450 EUR",Audi,A3,1.2 s tfsi,"97,890 km",2011,Petrol,Convertible,Silver,Helmond,78 kW (106 HP),Automatic,https://autovooru.nl/audi-a3~c3-2ca-3b26db.html,
1,"17,240 EUR",Nissan,Leaf,40 kWh,"92,555 km",2019,Electric,Hatchback,Silver,Tilburg,111 kW (151 HP),Automatic,https://www.autoscout24.nl/aanbod/nissan-leaf-...,
2,"16,700 EUR",Opel,Astra,1.2,"37,000 km",2021,Petrol,Hatchback,Silver,Kerkrade,82 kW (112 HP),Manual,https://www.nederlandmobiel.nl/tweedehands-aut...,
3,"9,445 EUR",Peugeot,308,1.2 vti,"165,970 km",2018,Petrol,Wagon,Silver,Stolwijk,82 kW (112 HP),Manual,https://www.nederlandmobiel.nl/tweedehands-aut...,
4,"9,995 EUR",Volkswagen,Polo,1.2 TSI,"104,495 km",2011,Petrol,Hatchback,Beige,Middelburg,78 kW (106 HP),Automatic,https://www.zeelandnet.nl/prikbord/volkswagen-...,


In [65]:
nl_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Price         260 non-null    object 
 1   Make          260 non-null    object 
 2   Model         260 non-null    object 
 3   Trim          259 non-null    object 
 4   Mi            260 non-null    object 
 5   Year          260 non-null    int64  
 6   Fuel type     260 non-null    object 
 7   Body type     260 non-null    object 
 8   Color         260 non-null    object 
 9   City          260 non-null    object 
 10  Power         260 non-null    object 
 11  Transmission  260 non-null    object 
 12  Contact       260 non-null    object 
 13  Euro norm     13 non-null     float64
dtypes: float64(1), int64(1), object(12)
memory usage: 28.6+ KB


In [66]:
nl_offers['Emission standard'] =nl_offers['Year'].apply(calculate_euro_norm)
nl_offers['Price(EUR)'] = nl_offers['Price'].apply(clean_price)
nl_offers['Mileage(km)'] = nl_offers['Mi'].apply(clean_mi)
nl_offers['kW'] = nl_offers['Power'].apply(power_kW_split)

In [67]:
nl_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact,Euro norm,Emission standard,Price(EUR),Mileage(km),kW
0,"13,450 EUR",Audi,A3,1.2 s tfsi,"97,890 km",2011,Petrol,Convertible,Silver,Helmond,78 kW (106 HP),Automatic,https://autovooru.nl/audi-a3~c3-2ca-3b26db.html,,5,13450,97890,78.0
1,"17,240 EUR",Nissan,Leaf,40 kWh,"92,555 km",2019,Electric,Hatchback,Silver,Tilburg,111 kW (151 HP),Automatic,https://www.autoscout24.nl/aanbod/nissan-leaf-...,,6,17240,92555,111.0
2,"16,700 EUR",Opel,Astra,1.2,"37,000 km",2021,Petrol,Hatchback,Silver,Kerkrade,82 kW (112 HP),Manual,https://www.nederlandmobiel.nl/tweedehands-aut...,,6,16700,37000,82.0
3,"9,445 EUR",Peugeot,308,1.2 vti,"165,970 km",2018,Petrol,Wagon,Silver,Stolwijk,82 kW (112 HP),Manual,https://www.nederlandmobiel.nl/tweedehands-aut...,,6,9445,165970,82.0
4,"9,995 EUR",Volkswagen,Polo,1.2 TSI,"104,495 km",2011,Petrol,Hatchback,Beige,Middelburg,78 kW (106 HP),Automatic,https://www.zeelandnet.nl/prikbord/volkswagen-...,,5,9995,104495,78.0


In [68]:
nl_offers.drop(columns= ['Price', 'Mi', 'Euro norm', 'Power'], inplace= True)

In [69]:
nl_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW
0,Audi,A3,1.2 s tfsi,2011,Petrol,Convertible,Silver,Helmond,Automatic,https://autovooru.nl/audi-a3~c3-2ca-3b26db.html,5,13450,97890,78.0
1,Nissan,Leaf,40 kWh,2019,Electric,Hatchback,Silver,Tilburg,Automatic,https://www.autoscout24.nl/aanbod/nissan-leaf-...,6,17240,92555,111.0
2,Opel,Astra,1.2,2021,Petrol,Hatchback,Silver,Kerkrade,Manual,https://www.nederlandmobiel.nl/tweedehands-aut...,6,16700,37000,82.0
3,Peugeot,308,1.2 vti,2018,Petrol,Wagon,Silver,Stolwijk,Manual,https://www.nederlandmobiel.nl/tweedehands-aut...,6,9445,165970,82.0
4,Volkswagen,Polo,1.2 TSI,2011,Petrol,Hatchback,Beige,Middelburg,Automatic,https://www.zeelandnet.nl/prikbord/volkswagen-...,5,9995,104495,78.0


In [70]:
nl_offers.isna().sum()

Make                 0
Model                0
Trim                 1
Year                 0
Fuel type            0
Body type            0
Color                0
City                 0
Transmission         0
Contact              0
Emission standard    0
Price(EUR)           0
Mileage(km)          0
kW                   0
dtype: int64

In [71]:
nl_offers= nl_offers.dropna()

In [72]:
# Set display options to show more rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [73]:
nl_offers.isna().sum()

Make                 0
Model                0
Trim                 0
Year                 0
Fuel type            0
Body type            0
Color                0
City                 0
Transmission         0
Contact              0
Emission standard    0
Price(EUR)           0
Mileage(km)          0
kW                   0
dtype: int64

In [74]:
nl_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 259 entries, 0 to 259
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               259 non-null    object 
 1   Model              259 non-null    object 
 2   Trim               259 non-null    object 
 3   Year               259 non-null    int64  
 4   Fuel type          259 non-null    object 
 5   Body type          259 non-null    object 
 6   Color              259 non-null    object 
 7   City               259 non-null    object 
 8   Transmission       259 non-null    object 
 9   Contact            259 non-null    object 
 10  Emission standard  259 non-null    int64  
 11  Price(EUR)         259 non-null    int64  
 12  Mileage(km)        259 non-null    int64  
 13  kW                 259 non-null    float64
dtypes: float64(1), int64(4), object(9)
memory usage: 30.4+ KB


In [75]:
nl_offers['Country'] = "Netherlands"

In [76]:
nl_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW,Country
0,Audi,A3,1.2 s tfsi,2011,Petrol,Convertible,Silver,Helmond,Automatic,https://autovooru.nl/audi-a3~c3-2ca-3b26db.html,5,13450,97890,78.0,Netherlands
1,Nissan,Leaf,40 kWh,2019,Electric,Hatchback,Silver,Tilburg,Automatic,https://www.autoscout24.nl/aanbod/nissan-leaf-...,6,17240,92555,111.0,Netherlands
2,Opel,Astra,1.2,2021,Petrol,Hatchback,Silver,Kerkrade,Manual,https://www.nederlandmobiel.nl/tweedehands-aut...,6,16700,37000,82.0,Netherlands
3,Peugeot,308,1.2 vti,2018,Petrol,Wagon,Silver,Stolwijk,Manual,https://www.nederlandmobiel.nl/tweedehands-aut...,6,9445,165970,82.0,Netherlands
4,Volkswagen,Polo,1.2 TSI,2011,Petrol,Hatchback,Beige,Middelburg,Automatic,https://www.zeelandnet.nl/prikbord/volkswagen-...,5,9995,104495,78.0,Netherlands


In [77]:
nl_offers.to_csv('cleaned_Netherlands_offers.csv', index=False)

6. Poland

In [78]:
pl_offers = pd.read_csv('Poland.csv')
pl_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"29,033 EUR (124,900 PLN)",Kia,Sportage,1.6 T-GDi,150 km,2023,Petrol,Crossover,Gray,Marki,111 kW (151 HP),Manual,https://www.gezet.pl/oferty/samochody/kia-spor...
1,"27,894 EUR (120,000 PLN)",Kia,Sportage,1.6 T-GDi,150 km,2023,Petrol,Crossover,Gray,Marki,111 kW (151 HP),Manual,https://www.gezet.pl/oferty/samochody/kia-spor...
2,"23,221 EUR (99,900 PLN)",Renault,Koleos,2.0 dCi,"117,398 km",2019,Diesel,Suv,Purple,Chorzów,,Automatic,https://autopunkt.pl/samochod/chorzow/suv/rena...
3,"28,568 EUR (122,900 PLN)",Audi,A4,2.0,"138,065 km",2020,Diesel,Wagon,White,Rzeszów,,,https://autopunkt.pl/samochod/rzeszow/kombi/au...
4,"29,033 EUR (124,900 PLN)",Peugeot,5008,2.0 HDi,"104,175 km",2020,Diesel,Mpv,Gray,Rzeszów,,Automatic,https://autopunkt.pl/samochod/rzeszow/minivan/...


In [79]:
rows_without_PLN = pl_offers[~pl_offers['Price'].str.contains('PLN', na=False)]
rows_without_PLN

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact


In [80]:
pl_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Price         260 non-null    object
 1   Make          260 non-null    object
 2   Model         260 non-null    object
 3   Trim          255 non-null    object
 4   Mi            260 non-null    object
 5   Year          260 non-null    int64 
 6   Fuel type     260 non-null    object
 7   Body type     260 non-null    object
 8   Color         260 non-null    object
 9   City          260 non-null    object
 10  Power         226 non-null    object
 11  Transmission  253 non-null    object
 12  Contact       260 non-null    object
dtypes: int64(1), object(12)
memory usage: 26.5+ KB


In [81]:
# First need to only keep euro as currency
pl_offers['Price'] = pl_offers['Price'].str.split('(').str[0].str.strip()

In [82]:
pl_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"29,033 EUR",Kia,Sportage,1.6 T-GDi,150 km,2023,Petrol,Crossover,Gray,Marki,111 kW (151 HP),Manual,https://www.gezet.pl/oferty/samochody/kia-spor...
1,"27,894 EUR",Kia,Sportage,1.6 T-GDi,150 km,2023,Petrol,Crossover,Gray,Marki,111 kW (151 HP),Manual,https://www.gezet.pl/oferty/samochody/kia-spor...
2,"23,221 EUR",Renault,Koleos,2.0 dCi,"117,398 km",2019,Diesel,Suv,Purple,Chorzów,,Automatic,https://autopunkt.pl/samochod/chorzow/suv/rena...
3,"28,568 EUR",Audi,A4,2.0,"138,065 km",2020,Diesel,Wagon,White,Rzeszów,,,https://autopunkt.pl/samochod/rzeszow/kombi/au...
4,"29,033 EUR",Peugeot,5008,2.0 HDi,"104,175 km",2020,Diesel,Mpv,Gray,Rzeszów,,Automatic,https://autopunkt.pl/samochod/rzeszow/minivan/...


In [83]:
pl_offers['Emission standard'] =pl_offers['Year'].apply(calculate_euro_norm)
pl_offers['Price(EUR)'] = pl_offers['Price'].apply(clean_price)
pl_offers['Mileage(km)'] = pl_offers['Mi'].apply(clean_mi)
pl_offers['kW'] = pl_offers['Power'].apply(power_kW_split)

In [84]:
pl_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW
0,"29,033 EUR",Kia,Sportage,1.6 T-GDi,150 km,2023,Petrol,Crossover,Gray,Marki,111 kW (151 HP),Manual,https://www.gezet.pl/oferty/samochody/kia-spor...,6,29033,150,111.0
1,"27,894 EUR",Kia,Sportage,1.6 T-GDi,150 km,2023,Petrol,Crossover,Gray,Marki,111 kW (151 HP),Manual,https://www.gezet.pl/oferty/samochody/kia-spor...,6,27894,150,111.0
2,"23,221 EUR",Renault,Koleos,2.0 dCi,"117,398 km",2019,Diesel,Suv,Purple,Chorzów,,Automatic,https://autopunkt.pl/samochod/chorzow/suv/rena...,6,23221,117398,
3,"28,568 EUR",Audi,A4,2.0,"138,065 km",2020,Diesel,Wagon,White,Rzeszów,,,https://autopunkt.pl/samochod/rzeszow/kombi/au...,6,28568,138065,
4,"29,033 EUR",Peugeot,5008,2.0 HDi,"104,175 km",2020,Diesel,Mpv,Gray,Rzeszów,,Automatic,https://autopunkt.pl/samochod/rzeszow/minivan/...,6,29033,104175,


In [85]:
pl_offers.drop(columns= ['Price', 'Mi', 'Power'], inplace= True)

In [86]:
pl_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW
0,Kia,Sportage,1.6 T-GDi,2023,Petrol,Crossover,Gray,Marki,Manual,https://www.gezet.pl/oferty/samochody/kia-spor...,6,29033,150,111.0
1,Kia,Sportage,1.6 T-GDi,2023,Petrol,Crossover,Gray,Marki,Manual,https://www.gezet.pl/oferty/samochody/kia-spor...,6,27894,150,111.0
2,Renault,Koleos,2.0 dCi,2019,Diesel,Suv,Purple,Chorzów,Automatic,https://autopunkt.pl/samochod/chorzow/suv/rena...,6,23221,117398,
3,Audi,A4,2.0,2020,Diesel,Wagon,White,Rzeszów,,https://autopunkt.pl/samochod/rzeszow/kombi/au...,6,28568,138065,
4,Peugeot,5008,2.0 HDi,2020,Diesel,Mpv,Gray,Rzeszów,Automatic,https://autopunkt.pl/samochod/rzeszow/minivan/...,6,29033,104175,


In [87]:
pl_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               260 non-null    object 
 1   Model              260 non-null    object 
 2   Trim               255 non-null    object 
 3   Year               260 non-null    int64  
 4   Fuel type          260 non-null    object 
 5   Body type          260 non-null    object 
 6   Color              260 non-null    object 
 7   City               260 non-null    object 
 8   Transmission       253 non-null    object 
 9   Contact            260 non-null    object 
 10  Emission standard  260 non-null    int64  
 11  Price(EUR)         260 non-null    int64  
 12  Mileage(km)        260 non-null    int64  
 13  kW                 226 non-null    float64
dtypes: float64(1), int64(4), object(9)
memory usage: 28.6+ KB


In [88]:
pl_offers.isna().sum()

Make                  0
Model                 0
Trim                  5
Year                  0
Fuel type             0
Body type             0
Color                 0
City                  0
Transmission          7
Contact               0
Emission standard     0
Price(EUR)            0
Mileage(km)           0
kW                   34
dtype: int64

In [89]:
pl_offers = pl_offers.dropna()

In [90]:
pl_offers.isna().sum()

Make                 0
Model                0
Trim                 0
Year                 0
Fuel type            0
Body type            0
Color                0
City                 0
Transmission         0
Contact              0
Emission standard    0
Price(EUR)           0
Mileage(km)          0
kW                   0
dtype: int64

In [91]:
pl_offers['Country'] = "Poland"

In [92]:
pl_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW,Country
0,Kia,Sportage,1.6 T-GDi,2023,Petrol,Crossover,Gray,Marki,Manual,https://www.gezet.pl/oferty/samochody/kia-spor...,6,29033,150,111.0,Poland
1,Kia,Sportage,1.6 T-GDi,2023,Petrol,Crossover,Gray,Marki,Manual,https://www.gezet.pl/oferty/samochody/kia-spor...,6,27894,150,111.0,Poland
5,Toyota,Auris,1.6 VVT-i,2017,Petrol,Hatchback,Black,Szczecin,Manual,https://www.autotrader.pl/oferta/toyota-auris-...,6,15318,132,98.0,Poland
8,Suzuki,Grand Vitara,1.9,2008,Diesel,Suv,Black,Piaseczno,Manual,http://www.aaaauto.pl/pl/car.html?aaaid=030008...,4,6043,160450,95.0,Poland
11,Ford,Kuga,1.5,2017,Petrol,Suv,Black,Piaseczno,Automatic,http://www.aaaauto.pl/pl/car.html?aaaid=030007...,6,19758,64984,134.0,Poland


In [93]:
pl_offers.to_csv('cleaned_Poland_offers.csv', index=False)

7. Spain

In [94]:
sp_offers = pd.read_csv('Spain.csv')
sp_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 234 entries, 0 to 233
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Price         234 non-null    object 
 1   Make          234 non-null    object 
 2   Model         234 non-null    object 
 3   Trim          233 non-null    object 
 4   Mi            234 non-null    object 
 5   Year          234 non-null    int64  
 6   Fuel type     234 non-null    object 
 7   Body type     234 non-null    object 
 8   Color         234 non-null    object 
 9   City          234 non-null    object 
 10  Power         234 non-null    object 
 11  Transmission  234 non-null    object 
 12  Contact       234 non-null    object 
 13  Euro norm     6 non-null      float64
dtypes: float64(1), int64(1), object(12)
memory usage: 25.7+ KB


In [95]:
sp_offers['Emission standard'] = sp_offers['Year'].apply(calculate_euro_norm)
sp_offers['Price(EUR)'] = sp_offers['Price'].apply(clean_price)
sp_offers['Mileage(km)'] = sp_offers['Mi'].apply(clean_mi)
sp_offers['kW'] = sp_offers['Power'].apply(power_kW_split)


In [96]:
sp_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact,Euro norm,Emission standard,Price(EUR),Mileage(km),kW
0,"14,495 EUR",Peugeot,2008,1.2 puretech,"81,000 km",2017,Petrol,Crossover,Gray,Valencia,100 kW (136 HP),Manual,https://coches.km77.com/peugeot/2008/en-valenc...,,6,14495,81000,100.0
1,"9,990 EUR",Hyundai,Santa Fe,2.2 CRDi,"174,051 km",2009,Diesel,Suv,Black,Madrid,115 kW (156 HP),Manual,https://coches.km77.com/hyundai/santa-fe/en-ma...,,5,9990,174051,115.0
2,"19,495 EUR",Audi,A1,1.0 25 TFSI,"72,254 km",2021,Petrol,Mini,Gray,Valencia,95 kW (129 HP),Manual,https://coches.km77.com/audi/a1/en-valencia/23...,,6,19495,72254,95.0
3,"19,495 EUR",Peugeot,2008,1.2 puretech,"29,049 km",2020,Petrol,Crossover,Gray,Madrid,130 kW (177 HP),Manual,https://coches.km77.com/peugeot/2008/en-madrid...,,6,19495,29049,130.0
4,"19,900 EUR",Peugeot,2008,1.2 puretech,"14,146 km",2020,Petrol,Crossover,Orange,Marçà,100 kW (136 HP),Manual,https://coches.km77.com/peugeot/2008/en-castel...,,6,19900,14146,100.0


In [97]:
sp_offers.drop(columns= ['Price', 'Mi', 'Power'], inplace= True)

In [99]:
sp_offers.drop(columns= ['Euro norm'], inplace= True)

In [100]:
sp_offers.isna().sum()

Make                 0
Model                0
Trim                 1
Year                 0
Fuel type            0
Body type            0
Color                0
City                 0
Transmission         0
Contact              0
Emission standard    0
Price(EUR)           0
Mileage(km)          0
kW                   0
dtype: int64

In [101]:
sp_offers = sp_offers.dropna()

In [102]:
sp_offers.isna().sum()

Make                 0
Model                0
Trim                 0
Year                 0
Fuel type            0
Body type            0
Color                0
City                 0
Transmission         0
Contact              0
Emission standard    0
Price(EUR)           0
Mileage(km)          0
kW                   0
dtype: int64

In [103]:
sp_offers['Country'] = "Spain"

In [104]:
sp_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW,Country
0,Peugeot,2008,1.2 puretech,2017,Petrol,Crossover,Gray,Valencia,Manual,https://coches.km77.com/peugeot/2008/en-valenc...,6,14495,81000,100.0,Spain
1,Hyundai,Santa Fe,2.2 CRDi,2009,Diesel,Suv,Black,Madrid,Manual,https://coches.km77.com/hyundai/santa-fe/en-ma...,5,9990,174051,115.0,Spain
2,Audi,A1,1.0 25 TFSI,2021,Petrol,Mini,Gray,Valencia,Manual,https://coches.km77.com/audi/a1/en-valencia/23...,6,19495,72254,95.0,Spain
3,Peugeot,2008,1.2 puretech,2020,Petrol,Crossover,Gray,Madrid,Manual,https://coches.km77.com/peugeot/2008/en-madrid...,6,19495,29049,130.0,Spain
4,Peugeot,2008,1.2 puretech,2020,Petrol,Crossover,Orange,Marçà,Manual,https://coches.km77.com/peugeot/2008/en-castel...,6,19900,14146,100.0,Spain


In [105]:
sp_offers.to_csv('cleaned_Spain_offers.csv', index=False)

8. Italy

In [106]:
it_offers= pd.read_csv('Italy.csv')
it_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Euro norm,Body type,Color,City,Power,Transmission,Contact
0,"44,900 EUR",Bmw,3 Series,316d,"7,042 km",2023,Diesel,6.0,Wagon,Gray,Brescia,90 kW (122 HP),Automatic,https://www.trova-automobile.it/auto/annuncio/...
1,"10,900 EUR",Peugeot,2008,1.6 BlueHDI,"158,665 km",2016,Electric,,Crossover,White,Agrigento,73 kW (99 HP),Automatic,https://www.vetrinamotori.it/peugeot-2008-2013...
2,"21,400 EUR",Jeep,Compass,1.6 multijet,"36,767 km",2020,Diesel,,Crossover,Red,Torino,89 kW (121 HP),Manual,https://www.trova-automobile.it/auto/annuncio/...
3,"33,900 EUR",Volvo,Xc40,1.5 T3,"12,000 km",2021,Petrol,,Crossover,Black,Roma,121 kW (165 HP),Automatic,https://www.trova-automobile.it/auto/annuncio/...
4,"75,990 EUR",Porsche,Macan,2.0,"39,000 km",2022,Petrol,6.0,Suv,Black,Alcamo,197 kW (268 HP),Automatic,https://www.trova-automobile.it/auto/annuncio/...


In [107]:
it_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Price         260 non-null    object 
 1   Make          260 non-null    object 
 2   Model         260 non-null    object 
 3   Trim          259 non-null    object 
 4   Mi            260 non-null    object 
 5   Year          260 non-null    int64  
 6   Fuel type     260 non-null    object 
 7   Euro norm     123 non-null    float64
 8   Body type     260 non-null    object 
 9   Color         260 non-null    object 
 10  City          260 non-null    object 
 11  Power         260 non-null    object 
 12  Transmission  259 non-null    object 
 13  Contact       260 non-null    object 
dtypes: float64(1), int64(1), object(12)
memory usage: 28.6+ KB


In [108]:
it_offers['Emission standard'] =it_offers['Year'].apply(calculate_euro_norm)
it_offers['Price(EUR)'] = it_offers['Price'].apply(clean_price)
it_offers['Mileage(km)'] = it_offers['Mi'].apply(clean_mi)
it_offers['kW'] = it_offers['Power'].apply(power_kW_split)

In [109]:
it_offers.drop(columns= ['Price', 'Mi', 'Power', 'Euro norm'], inplace= True)

In [110]:
it_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               260 non-null    object 
 1   Model              260 non-null    object 
 2   Trim               259 non-null    object 
 3   Year               260 non-null    int64  
 4   Fuel type          260 non-null    object 
 5   Body type          260 non-null    object 
 6   Color              260 non-null    object 
 7   City               260 non-null    object 
 8   Transmission       259 non-null    object 
 9   Contact            260 non-null    object 
 10  Emission standard  260 non-null    int64  
 11  Price(EUR)         260 non-null    int64  
 12  Mileage(km)        260 non-null    int64  
 13  kW                 260 non-null    float64
dtypes: float64(1), int64(4), object(9)
memory usage: 28.6+ KB


In [111]:
# delete the rows with missing values
it_offers =it_offers.dropna()

In [112]:
it_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 258 entries, 0 to 259
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               258 non-null    object 
 1   Model              258 non-null    object 
 2   Trim               258 non-null    object 
 3   Year               258 non-null    int64  
 4   Fuel type          258 non-null    object 
 5   Body type          258 non-null    object 
 6   Color              258 non-null    object 
 7   City               258 non-null    object 
 8   Transmission       258 non-null    object 
 9   Contact            258 non-null    object 
 10  Emission standard  258 non-null    int64  
 11  Price(EUR)         258 non-null    int64  
 12  Mileage(km)        258 non-null    int64  
 13  kW                 258 non-null    float64
dtypes: float64(1), int64(4), object(9)
memory usage: 30.2+ KB


In [113]:
it_offers['Country'] = "Italy"

In [114]:
it_offers.to_csv('cleaned_Italy_offers.csv', index=False)

9. Sweden

In [115]:
sd_offers= pd.read_csv('Sweden.csv')
sd_offers

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"32,230 EUR (359,900 SEK)",Bmw,X6,xDrive30d,"176,250 km",2015,Diesel,Suv,Gray,Uddevalla,190 kW (258 HP),Automatic,https://www.bytbil.com/vastra-gotalands-lan/pe...
1,"7,155 EUR (79,900 SEK)",Mercedes Benz,Cls-Class,55 AMG,"240,010 km",2006,Petrol,Sedan,Silver,Stockholm,200 kW (272 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...
2,"394,800 EUR",Bmw,5 Series,530e,"37,990 km",2024,Hybrid,Sedan,Black,Svarta,135 kW (184 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...
3,"849,900 EUR",Bmw,X5,,"28,000 km",2024,Hybrid,Suv,Blue,Knivsta,210 kW (286 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...
4,"33,941 EUR (379,000 SEK)",Bmw,X6,xDrive30d,"100,000 km",2015,Diesel,Suv,Black,Eskilstuna,190 kW (258 HP),Automatic,https://www.bytbil.com/sodermanlands-lan/perso...
5,"569,900 EUR",Audi,A7,4.0 TFSI,"110,000 km",2015,Petrol,Sedan,White,Landvetter,412 kW (560 HP),Automatic,https://www.bytbil.com/vastra-gotalands-lan/pe...
6,"9,842 EUR (109,900 SEK)",Volvo,V70,2.5 D5,"276,000 km",2012,Diesel,Wagon,Silver,Karlstad,160 kW (218 HP),Automatic,https://www.bytbil.com/varmlands-lan/personbil...
7,"35,284 EUR (394,000 SEK)",Lexus,Rx,450h,"98,490 km",2017,Hybrid,Suv,Black,Stockholm,233 kW (317 HP),Automatic,https://www.bilia.se/bilar/sok-bil/lexus/rx/yo...
8,"33,941 EUR (379,000 SEK)",Bmw,5 Series,530i,"53,000 km",2019,Petrol,Sedan,White,Falun,187 kW (254 HP),Automatic,https://bilweb.se/dalarnas-lan/bmw-530-i-xdriv...
9,"43,792 EUR (489,000 SEK)",Volvo,Xc90,2.0 D5 AWD,"86,000 km",2019,Diesel,Suv,Black,Stockholm,175 kW (238 HP),Automatic,https://bilweb.se/stockholms-lan/volvo-xc90-d5...


In [116]:
rows_without_SEK = sd_offers[~sd_offers['Price'].str.contains('SEK', na=False)]

In [117]:
rows_without_SEK

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
2,"394,800 EUR",Bmw,5 Series,530e,"37,990 km",2024,Hybrid,Sedan,Black,Svarta,135 kW (184 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...
3,"849,900 EUR",Bmw,X5,,"28,000 km",2024,Hybrid,Suv,Blue,Knivsta,210 kW (286 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...
5,"569,900 EUR",Audi,A7,4.0 TFSI,"110,000 km",2015,Petrol,Sedan,White,Landvetter,412 kW (560 HP),Automatic,https://www.bytbil.com/vastra-gotalands-lan/pe...
10,"539,000 EUR",Land Rover,Range Rover Velar,3.0 P380,"73,600 km",2017,Petrol,Suv,Silver,Uppsala,280 kW (381 HP),Automatic,https://www.bytbil.com/uppsala-lan/personbil-r...
11,"374,900 EUR",Volvo,Xc60,2.0 T8 AWD,230 km,2024,Hybrid,Crossover,White,Karlstad,235 kW (320 HP),Automatic,https://www.bytbil.com/varmlands-lan/personbil...
12,"369,900 EUR",Bmw,5 Series,530e,"74,890 km",2023,Hybrid,Sedan,White,Uppsala,135 kW (184 HP),Automatic,https://www.bytbil.com/uppsala-lan/personbil-5...
17,"629,900 EUR",Audi,Q7,3.0 tfsi,"34,900 km",2023,Hybrid,Suv,Black,Are,250 kW (340 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...
20,"549,900 EUR",Volvo,Xc60,2.0 Recharge T6,"62,000 km",2024,Hybrid,Crossover,White,Stockholm,186 kW (253 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...
22,"734,000 EUR",Bmw,M5,4.4,"68,350 km",2019,Petrol,Sedan,Gold,Sigtuna,441 kW (600 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...
23,"379,900 EUR",Mercedes Benz,E-Class,,"63,490 km",2019,Diesel,Sedan,White,Stockholm,135 kW (184 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...


There are some rows without SEK currency value, and the EUR are actually SEK. The exchange rate is 0,087

In [None]:
# First need to only keep euro as currency
sd_offers['Price'] = sd_offers['Price'].str.split('(').str[0].str.strip()

In [119]:
sd_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"32,230 EUR",Bmw,X6,xDrive30d,"176,250 km",2015,Diesel,Suv,Gray,Uddevalla,190 kW (258 HP),Automatic,https://www.bytbil.com/vastra-gotalands-lan/pe...
1,"7,155 EUR",Mercedes Benz,Cls-Class,55 AMG,"240,010 km",2006,Petrol,Sedan,Silver,Stockholm,200 kW (272 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...
2,"394,800 EUR",Bmw,5 Series,530e,"37,990 km",2024,Hybrid,Sedan,Black,Svarta,135 kW (184 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...
3,"849,900 EUR",Bmw,X5,,"28,000 km",2024,Hybrid,Suv,Blue,Knivsta,210 kW (286 HP),Automatic,https://www.bytbil.com/stockholms-lan/personbi...
4,"33,941 EUR",Bmw,X6,xDrive30d,"100,000 km",2015,Diesel,Suv,Black,Eskilstuna,190 kW (258 HP),Automatic,https://www.bytbil.com/sodermanlands-lan/perso...


In [120]:
sd_offers['Emission standard'] =sd_offers['Year'].apply(calculate_euro_norm)
sd_offers['Price(EUR)'] = sd_offers['Price'].apply(clean_price)
sd_offers['Mileage(km)'] = sd_offers['Mi'].apply(clean_mi)
sd_offers['kW'] = sd_offers['Power'].apply(power_kW_split)

In [121]:
# Change the value with the index from the rows_without_SEK by multiplying 0.087
sd_offers.loc[rows_without_SEK.index, 'Price(EUR)'] = sd_offers.loc[rows_without_SEK.index, 'Price(EUR)'] * 0.087

 33051.3 60813.  32181.3 33051.3 33921.3 46101.3 32973.  32181.3 33834.3
 47841.3 47763.  82563.  47763.  12606.3 62553.  32103.  31755.  45153.
 19131.3 31311.3 14007.  31746.3 10431.3 27831.3 60813.  60813.  31311.3
 32607.6 48633.  52191.3  9300.3  9561.3 33042.6 45231.3 46101.3 17826.3
 32973.  15999.3 47841.3 60891.3 54723.  69591.3 34356.3 14328.9 31311.3
 32973.  36531.3 33921.3 31233.  14346.3  9996.3  9483.  33051.3 33051.3
 62205.  47763.  31311.3 31311.3 28257.6  5211.3 16086.3 33912.6 14337.6
 57333.   3906.3 32094.3 16086.3  9561.3 72201.3]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  sd_offers.loc[rows_without_SEK.index, 'Price(EUR)'] = sd_offers.loc[rows_without_SEK.index, 'Price(EUR)'] * 0.087


In [122]:
sd_offers.drop(columns= ['Price', 'Mi', 'Power'], inplace= True)

In [123]:
sd_offers.isna().sum()

Make                  0
Model                 0
Trim                 27
Year                  0
Fuel type             0
Body type             0
Color                 0
City                  0
Transmission          0
Contact               0
Emission standard     0
Price(EUR)            0
Mileage(km)           0
kW                   12
dtype: int64

In [124]:
sd_offers.loc[sd_offers['Color'].isna(), 'Color'] = "NA"

In [125]:
sd_offers = sd_offers.dropna()

In [126]:
sd_offers.isna().sum()

Make                 0
Model                0
Trim                 0
Year                 0
Fuel type            0
Body type            0
Color                0
City                 0
Transmission         0
Contact              0
Emission standard    0
Price(EUR)           0
Mileage(km)          0
kW                   0
dtype: int64

In [127]:
sd_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW
0,Bmw,X6,xDrive30d,2015,Diesel,Suv,Gray,Uddevalla,Automatic,https://www.bytbil.com/vastra-gotalands-lan/pe...,6,32230.0,176250,190.0
1,Mercedes Benz,Cls-Class,55 AMG,2006,Petrol,Sedan,Silver,Stockholm,Automatic,https://www.bytbil.com/stockholms-lan/personbi...,4,7155.0,240010,200.0
2,Bmw,5 Series,530e,2024,Hybrid,Sedan,Black,Svarta,Automatic,https://www.bytbil.com/stockholms-lan/personbi...,6,34347.6,37990,135.0
4,Bmw,X6,xDrive30d,2015,Diesel,Suv,Black,Eskilstuna,Automatic,https://www.bytbil.com/sodermanlands-lan/perso...,6,33941.0,100000,190.0
5,Audi,A7,4.0 TFSI,2015,Petrol,Sedan,White,Landvetter,Automatic,https://www.bytbil.com/vastra-gotalands-lan/pe...,6,49581.3,110000,412.0


In [128]:
sd_offers['Country'] = "Sweden"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sd_offers['Country'] = "Sweden"


In [129]:
sd_offers.to_csv('cleaned_Sweden_offers.csv', index=False)

10. Switzerland

In [145]:
sl_offers = pd.read_csv('Switzerland.csv')
sl_offers.head()

Unnamed: 0,Price,Make,Model,Mi,Year,Fuel type,Body type,Color,City,Transmission,Contact,Trim,Euro norm,Power
0,"46,850 EUR (44,988 CHF)",Volvo,Ex30,0 km,2024,Electric,Suv,Black,Kreuzlingen,Automatic,https://www.carforyou.ch/de/auto/volvo/ex30,,,
1,"15,402 EUR (14,790 CHF)",Mazda,626,"112,003 km",2017,Petrol,Wagon,Black,Spiez,Manual,https://www.carforyou.ch/de/auto/mazda/626,,,
2,"49,518 EUR (47,550 CHF)",Mazda,Cx-60,0 km,2023,,Suv,Black,Uster,Automatic,https://www.carforyou.ch/de/auto/mazda/cx-60,3.3 e-Skyactiv D,,
3,"13,012 EUR (12,495 CHF)",Jaguar,Xk,"66,300 km",2002,Petrol,Convertible,Gray,,Automatic,https://www.carandclassic.com/car/C1574580,4.2,,
4,"29,679 EUR (28,500 CHF)",Honda,S2000,"12,000 km",2001,,Convertible,Black,Gossau,Manual,https://www.carforyou.ch/de/auto/honda/s2000,,,


In [146]:
sl_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Price         260 non-null    object 
 1   Make          260 non-null    object 
 2   Model         260 non-null    object 
 3   Mi            252 non-null    object 
 4   Year          260 non-null    int64  
 5   Fuel type     208 non-null    object 
 6   Body type     230 non-null    object 
 7   Color         177 non-null    object 
 8   City          109 non-null    object 
 9   Transmission  239 non-null    object 
 10  Contact       260 non-null    object 
 11  Trim          214 non-null    object 
 12  Euro norm     81 non-null     float64
 13  Power         223 non-null    object 
dtypes: float64(1), int64(1), object(12)
memory usage: 28.6+ KB


In [147]:
# Removing the missing values for "Mi", "Fuel type", 'Body type', 'Color', 'Transmission', 'Power'
sl_offers.dropna(subset=["Mi", 'Trim', "Fuel type", 'Body type', 'Transmission', 'Power'], inplace= True)

In [148]:
sl_offers.loc[sl_offers['Color'].isna(), 'Color'] = "NA"
sl_offers.loc[sl_offers['City'].isna(), 'City'] = "NA"

In [149]:
rows_without_CHF = sl_offers[~sl_offers['Price'].str.contains('CHF', na=False)]
rows_without_CHF

Unnamed: 0,Price,Make,Model,Mi,Year,Fuel type,Body type,Color,City,Transmission,Contact,Trim,Euro norm,Power


In [150]:
# First need to only keep euro as currency
sl_offers['Price'] = sl_offers['Price'].str.split('(').str[0].str.strip()

In [151]:
sl_offers['Emission standard'] =sl_offers['Year'].apply(calculate_euro_norm)
sl_offers['Price(EUR)'] = sl_offers['Price'].apply(clean_price)
sl_offers['Mileage(km)'] = sl_offers['Mi'].apply(clean_mi)
sl_offers['kW'] = sl_offers['Power'].apply(power_kW_split)

In [152]:
sl_offers.loc[rows_without_CHF.index, 'Price(EUR)'] *= 1.03

In [153]:
sl_offers.drop(columns= ['Price', 'Mi', 'Power', 'Euro norm'], inplace= True)

In [154]:
sl_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 150 entries, 18 to 259
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               150 non-null    object 
 1   Model              150 non-null    object 
 2   Year               150 non-null    int64  
 3   Fuel type          150 non-null    object 
 4   Body type          150 non-null    object 
 5   Color              150 non-null    object 
 6   City               150 non-null    object 
 7   Transmission       150 non-null    object 
 8   Contact            150 non-null    object 
 9   Trim               150 non-null    object 
 10  Emission standard  150 non-null    int64  
 11  Price(EUR)         150 non-null    int64  
 12  Mileage(km)        150 non-null    int64  
 13  kW                 150 non-null    float64
dtypes: float64(1), int64(4), object(9)
memory usage: 21.6+ KB


In [155]:
sl_offers['Country'] = "Switzerland"

In [156]:
sl_offers.head()

Unnamed: 0,Make,Model,Year,Fuel type,Body type,Color,City,Transmission,Contact,Trim,Emission standard,Price(EUR),Mileage(km),kW,Country
18,Bmw,2 Series,2020,Petrol,Convertible,Black,,Automatic,https://www.azw.ch/de/angebot/284/bmw-220i-cab...,220i,6,31033,25500,137.0,Switzerland
19,Škoda,Enyaq,2024,Electric,Suv,Blue,,Automatic,https://www.azw.ch/de/angebot/624/skoda-enyaq-...,electro,6,40405,10,134.0,Switzerland
20,Seat,Ateca,2023,Petrol,Suv,Black,,Automatic,https://www.azw.ch/de/angebot/982/seat-ateca-1...,1.5 tsi,6,29992,12500,111.0,Switzerland
22,Tesla,Model Y,2023,Electric,Suv,Black,,Automatic,https://www.azw.ch/de/angebot/989/tesla-model-...,0.0 long range,6,43530,22200,383.0,Switzerland
23,Alfa Romeo,Tonale,2024,Hybrid,Crossover,Gray,,Automatic,https://www.azw.ch/de/angebot/1033/alfa-romeo-...,1.3,6,49778,6,208.0,Switzerland


In [157]:
sl_offers.to_csv('cleaned_Switzerland_offers.csv', index=False)

11. Portugal

In [162]:
pt_offers = pd.read_csv('Portugal.csv')
pt_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"9,850 EUR",Alfa Romeo,Giulietta,1.6 JTDm,"184,394 km",2012,Diesel,Hatchback,Black,Maia,78 kW (106 HP),Manual,https://www.motores24h.pt/alfa-romeo-giulietta...
1,"15,480 EUR",Seat,Leon,1.6 TDI,"137,000 km",2019,Diesel,Hatchback,Black,Viseu,86 kW (117 HP),Manual,https://www.motores24h.pt/seat-leon-st-1.6-tdi...
2,"22,800 EUR",Bmw,4 Series,,"159,772 km",2014,Diesel,Coupe,Gray,Maia,137 kW (186 HP),Automatic,https://www.motores24h.pt/bmw-serie-4-420-d-xd...
3,"14,900 EUR",Renault,Megane,1.5 dCi,"117,635 km",2017,Diesel,Coupe,Gray,Maia,82 kW (112 HP),Manual,https://www.motores24h.pt/renault-megane-1.5-d...
4,"12,450 EUR",Nissan,Leaf,Tekna,"38,000 km",2017,Electric,Hatchback,White,Viseu,81 kW (110 HP),Automatic,https://www.motores24h.pt/nissan-leaf-tekna-30...


In [165]:
pt_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Price         260 non-null    object
 1   Make          260 non-null    object
 2   Model         260 non-null    object
 3   Trim          245 non-null    object
 4   Mi            260 non-null    object
 5   Year          260 non-null    int64 
 6   Fuel type     260 non-null    object
 7   Body type     260 non-null    object
 8   Color         260 non-null    object
 9   City          260 non-null    object
 10  Power         153 non-null    object
 11  Transmission  228 non-null    object
 12  Contact       260 non-null    object
dtypes: int64(1), object(12)
memory usage: 26.5+ KB


In [164]:
pt_offers.loc[pt_offers['Color'].isna(), 'Color'] = "NA"
pt_offers.loc[pt_offers['City'].isna(), 'City'] = "NA"

In [166]:
pt_offers = pt_offers.dropna()

In [167]:
pt_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 144 entries, 0 to 155
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Price         144 non-null    object
 1   Make          144 non-null    object
 2   Model         144 non-null    object
 3   Trim          144 non-null    object
 4   Mi            144 non-null    object
 5   Year          144 non-null    int64 
 6   Fuel type     144 non-null    object
 7   Body type     144 non-null    object
 8   Color         144 non-null    object
 9   City          144 non-null    object
 10  Power         144 non-null    object
 11  Transmission  144 non-null    object
 12  Contact       144 non-null    object
dtypes: int64(1), object(12)
memory usage: 15.8+ KB


In [168]:
pt_offers['Emission standard'] =pt_offers['Year'].apply(calculate_euro_norm)
pt_offers['Price(EUR)'] = pt_offers['Price'].apply(clean_price)
pt_offers['Mileage(km)'] = pt_offers['Mi'].apply(clean_mi)
pt_offers['kW'] = pt_offers['Power'].apply(power_kW_split)

In [169]:
pt_offers.drop(columns= ['Price', 'Mi', 'Power'], inplace= True)

In [170]:
pt_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 144 entries, 0 to 155
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               144 non-null    object 
 1   Model              144 non-null    object 
 2   Trim               144 non-null    object 
 3   Year               144 non-null    int64  
 4   Fuel type          144 non-null    object 
 5   Body type          144 non-null    object 
 6   Color              144 non-null    object 
 7   City               144 non-null    object 
 8   Transmission       144 non-null    object 
 9   Contact            144 non-null    object 
 10  Emission standard  144 non-null    int64  
 11  Price(EUR)         144 non-null    int64  
 12  Mileage(km)        144 non-null    int64  
 13  kW                 144 non-null    float64
dtypes: float64(1), int64(4), object(9)
memory usage: 16.9+ KB


In [171]:
pt_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW
0,Alfa Romeo,Giulietta,1.6 JTDm,2012,Diesel,Hatchback,Black,Maia,Manual,https://www.motores24h.pt/alfa-romeo-giulietta...,5,9850,184394,78.0
1,Seat,Leon,1.6 TDI,2019,Diesel,Hatchback,Black,Viseu,Manual,https://www.motores24h.pt/seat-leon-st-1.6-tdi...,6,15480,137000,86.0
3,Renault,Megane,1.5 dCi,2017,Diesel,Coupe,Gray,Maia,Manual,https://www.motores24h.pt/renault-megane-1.5-d...,6,14900,117635,82.0
4,Nissan,Leaf,Tekna,2017,Electric,Hatchback,White,Viseu,Automatic,https://www.motores24h.pt/nissan-leaf-tekna-30...,6,12450,38000,81.0
5,Bmw,1 Series,116d,2011,Diesel,Coupe,Black,Viseu,Manual,https://www.motores24h.pt/bmw-serie-1-116-d-ca...,5,11750,165000,86.0


In [172]:
pt_offers['Country'] = 'Portugal'

In [173]:
pt_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW,Country
0,Alfa Romeo,Giulietta,1.6 JTDm,2012,Diesel,Hatchback,Black,Maia,Manual,https://www.motores24h.pt/alfa-romeo-giulietta...,5,9850,184394,78.0,Portugal
1,Seat,Leon,1.6 TDI,2019,Diesel,Hatchback,Black,Viseu,Manual,https://www.motores24h.pt/seat-leon-st-1.6-tdi...,6,15480,137000,86.0,Portugal
3,Renault,Megane,1.5 dCi,2017,Diesel,Coupe,Gray,Maia,Manual,https://www.motores24h.pt/renault-megane-1.5-d...,6,14900,117635,82.0,Portugal
4,Nissan,Leaf,Tekna,2017,Electric,Hatchback,White,Viseu,Automatic,https://www.motores24h.pt/nissan-leaf-tekna-30...,6,12450,38000,81.0,Portugal
5,Bmw,1 Series,116d,2011,Diesel,Coupe,Black,Viseu,Manual,https://www.motores24h.pt/bmw-serie-1-116-d-ca...,5,11750,165000,86.0,Portugal


In [174]:
pt_offers.to_csv('cleaned_Portugal_offers.csv', index=False)

12. Denmark

In [175]:
dm_offers = pd.read_csv('Denmark.csv')
dm_offers

Unnamed: 0,Price,Make,Model,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact,Trim
0,"100,619 EUR (749,995 DKK)",Porsche,Taycan,"57,000 km",2021,Electric,Sedan,Black,Kastrup,283 kW (385 HP),Automatic,https://www.bilbasen.dk/brugt/bil/porsche/tayc...,
1,"113,901 EUR (849,000 DKK)",Porsche,Macan,"82,000 km",2015,Petrol,Suv,Black,Hadsund,298 kW (405 HP),Automatic,https://www.bilbasen.dk/brugt/bil/porsche/maca...,3.6 Turbo
2,"127,451 EUR (950,000 DKK)",Porsche,Taycan,"43,000 km",2021,Electric,Sedan,Black,Hørsholm,490 kW (666 HP),Automatic,https://bilhandel.dk/porsche-taycan-4s-cross-t...,4S
3,"29,099 EUR (216,900 DKK)",Hyundai,I20,"4,000 km",2023,Petrol,Hatchback,White,Søborg,74 kW (101 HP),Automatic,https://www.bilbasen.dk/brugt/bil/hyundai/i20/...,1.0
4,"38,839 EUR (289,500 DKK)",Saic,Marvel R,100 km,2022,Electric,Crossover,Gray,Bredebro,288 kW (392 HP),Automatic,https://bilhandel.dk/mg-marvel-r-performance/i...,electro
5,"72,419 EUR (539,800 DKK)",Audi,Sq5,"85,000 km",2017,Diesel,Suv,White,Bramming,326 kW (443 HP),Automatic,https://bilhandel.dk/audi-sq5-30-tdi-326-compe...,3.0 TDI
6,"27,489 EUR (204,900 DKK)",Jeep,Compass,"71,000 km",2018,Diesel,Crossover,White,Kolding,89 kW (121 HP),Manual,https://www.bilbasen.dk/brugt/bil/jeep/compass...,1.6
7,"8,706 EUR (64,900 DKK)",Kia,Picanto,"126,000 km",2016,Petrol,Mini,White,Toftlund,69 kW (94 HP),Manual,https://bilhandel.dk/kia-picanto-10-attraction...,1.0
8,"56,333 EUR (419,900 DKK)",Kia,Ev6,15 km,2024,Electric,Crossover,Black,Birkerød,170 kW (231 HP),Automatic,https://www.bilbasen.dk/brugt/bil/kia/ev6/77-l...,
9,"48,270 EUR (359,800 DKK)",Volkswagen,Tiguan,"43,000 km",2020,Petrol,Crossover,White,Frederikssund,150 kW (204 HP),Automatic,https://bilhandel.dk/vw-tiguan-15-tsi-150-comf...,1.5 tsi


For the missing DKK values in Price column, the EUR also needs to multiply the exchange rate 0,13

In [176]:
rows_without_DKK = dm_offers[~dm_offers['Price'].str.contains('DKK', na=False)]

In [177]:
dm_offers['Price'] = dm_offers['Price'].str.split('(').str[0].str.strip()

In [178]:
dm_offers['Emission standard'] =dm_offers['Year'].apply(calculate_euro_norm)
dm_offers['Price(EUR)'] = dm_offers['Price'].apply(clean_price)
dm_offers['Mileage(km)'] = dm_offers['Mi'].apply(clean_mi)
dm_offers['kW'] = dm_offers['Power'].apply(power_kW_split)

In [179]:
dm_offers.loc[rows_without_DKK.index, 'Price(EUR)'] = dm_offers.loc[rows_without_DKK.index, 'Price(EUR)'] * 0.13

In [180]:
dm_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Price              260 non-null    object 
 1   Make               260 non-null    object 
 2   Model              260 non-null    object 
 3   Mi                 260 non-null    object 
 4   Year               260 non-null    int64  
 5   Fuel type          260 non-null    object 
 6   Body type          260 non-null    object 
 7   Color              243 non-null    object 
 8   City               260 non-null    object 
 9   Power              259 non-null    object 
 10  Transmission       260 non-null    object 
 11  Contact            260 non-null    object 
 12  Trim               217 non-null    object 
 13  Emission standard  260 non-null    int64  
 14  Price(EUR)         260 non-null    int64  
 15  Mileage(km)        260 non-null    int64  
 16  kW                 259 non

In [181]:

dm_offers.loc[dm_offers['Color'].isna(), 'Color'] = "NA"

In [182]:
dm_offers = dm_offers.dropna()

In [183]:
dm_offers.drop(columns= ['Price', 'Mi', 'Power'], inplace= True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dm_offers.drop(columns= ['Price', 'Mi', 'Power'], inplace= True)


In [184]:
dm_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 216 entries, 1 to 258
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               216 non-null    object 
 1   Model              216 non-null    object 
 2   Year               216 non-null    int64  
 3   Fuel type          216 non-null    object 
 4   Body type          216 non-null    object 
 5   Color              216 non-null    object 
 6   City               216 non-null    object 
 7   Transmission       216 non-null    object 
 8   Contact            216 non-null    object 
 9   Trim               216 non-null    object 
 10  Emission standard  216 non-null    int64  
 11  Price(EUR)         216 non-null    int64  
 12  Mileage(km)        216 non-null    int64  
 13  kW                 216 non-null    float64
dtypes: float64(1), int64(4), object(9)
memory usage: 25.3+ KB


In [185]:
dm_offers['Country'] = 'Denmark'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dm_offers['Country'] = 'Denmark'


In [186]:
dm_offers.to_csv('cleaned_Denmark_offers.csv',index=False)

13.Norway

In [189]:
nw_offers = pd.read_csv('Norway.csv')
nw_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"38,499 EUR (439,000 NOK)",Lexus,Ux,300e,"2,000 km",2023,Electric,Crossover,Green,Leknes,152 kW (207 HP),Automatic,https://www.nordvik.no/bruktbil/342158655
1,"6,051 EUR (69,000 NOK)",Opel,Insignia,2.0 CDTi,"221,400 km",2010,Diesel,Wagon,Black,Jessheim,82 kW (112 HP),Manual,https://www.finn.no/car/used/ad.html?finnkode=...
2,"12,234 EUR (139,500 NOK)",Toyota,Auris,1.6 VVT-i,"66,000 km",2013,Petrol,Hatchback,Silver,Hønefoss,98 kW (133 HP),Automatic,https://www.finn.no/car/used/ad.html?finnkode=...
3,"9,559 EUR (109,000 NOK)",Volkswagen,Passat,2.0 TDI,"185,000 km",2012,Diesel,Wagon,Black,Larvik,104 kW (141 HP),Automatic,https://www.finn.no/car/used/ad.html?finnkode=...
4,"12,216 EUR (139,305 NOK)",Subaru,Forester,2.0 D,"231,574 km",2013,Diesel,Suv,Black,Tønsberg,109 kW (148 HP),Manual,https://www.finn.no/car/used/ad.html?finnkode=...


In [190]:
rows_without_NOK = nw_offers[~nw_offers['Price'].str.contains('NOK', na=False)]
rows_without_NOK

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
89,"35,990 EUR",Citroen,C3,1.6,"114,000 km",2006,Petrol,Hatchback,Gray,Skien,81 kW (110 HP),Automatic,https://www.finn.no/car/used/ad.html?finnkode=...
90,"53,000 EUR",Citroen,C3,1.6,"112,000 km",2006,Petrol,Hatchback,Gray,Skien,81 kW (110 HP),Automatic,https://www.finn.no/car/used/ad.html?finnkode=...


In [191]:
nw_offers['Price'] = nw_offers['Price'].str.split('(').str[0]

In [192]:
nw_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact
0,"38,499 EUR",Lexus,Ux,300e,"2,000 km",2023,Electric,Crossover,Green,Leknes,152 kW (207 HP),Automatic,https://www.nordvik.no/bruktbil/342158655
1,"6,051 EUR",Opel,Insignia,2.0 CDTi,"221,400 km",2010,Diesel,Wagon,Black,Jessheim,82 kW (112 HP),Manual,https://www.finn.no/car/used/ad.html?finnkode=...
2,"12,234 EUR",Toyota,Auris,1.6 VVT-i,"66,000 km",2013,Petrol,Hatchback,Silver,Hønefoss,98 kW (133 HP),Automatic,https://www.finn.no/car/used/ad.html?finnkode=...
3,"9,559 EUR",Volkswagen,Passat,2.0 TDI,"185,000 km",2012,Diesel,Wagon,Black,Larvik,104 kW (141 HP),Automatic,https://www.finn.no/car/used/ad.html?finnkode=...
4,"12,216 EUR",Subaru,Forester,2.0 D,"231,574 km",2013,Diesel,Suv,Black,Tønsberg,109 kW (148 HP),Manual,https://www.finn.no/car/used/ad.html?finnkode=...


In [193]:
nw_offers['Emission standard'] =nw_offers['Year'].apply(calculate_euro_norm)
nw_offers['Price(EUR)'] = nw_offers['Price'].apply(clean_price)
nw_offers['Mileage(km)'] = nw_offers['Mi'].apply(clean_mi)
nw_offers['kW'] = nw_offers['Power'].apply(power_kW_split)

In [194]:
nw_offers.loc[rows_without_NOK.index, 'Price(EUR)'] *= 0.086

  nw_offers.loc[rows_without_NOK.index, 'Price(EUR)'] *= 0.086


In [200]:
nw_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Price              260 non-null    object 
 1   Make               260 non-null    object 
 2   Model              260 non-null    object 
 3   Trim               211 non-null    object 
 4   Mi                 260 non-null    object 
 5   Year               260 non-null    int64  
 6   Fuel type          260 non-null    object 
 7   Body type          260 non-null    object 
 8   Color              260 non-null    object 
 9   City               260 non-null    object 
 10  Power              258 non-null    object 
 11  Transmission       260 non-null    object 
 12  Contact            260 non-null    object 
 13  Emission standard  260 non-null    int64  
 14  Price(EUR)         260 non-null    float64
 15  Mileage(km)        260 non-null    int64  
 16  kW                 258 non

In [201]:
nw_offers.drop(columns= ['Price', 'Mi', 'Power'], inplace= True)

In [202]:
nw_offers = nw_offers.dropna()

In [203]:
nw_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW
0,Lexus,Ux,300e,2023,Electric,Crossover,Green,Leknes,Automatic,https://www.nordvik.no/bruktbil/342158655,6,38499.0,2000,152.0
1,Opel,Insignia,2.0 CDTi,2010,Diesel,Wagon,Black,Jessheim,Manual,https://www.finn.no/car/used/ad.html?finnkode=...,5,6051.0,221400,82.0
2,Toyota,Auris,1.6 VVT-i,2013,Petrol,Hatchback,Silver,Hønefoss,Automatic,https://www.finn.no/car/used/ad.html?finnkode=...,5,12234.0,66000,98.0
3,Volkswagen,Passat,2.0 TDI,2012,Diesel,Wagon,Black,Larvik,Automatic,https://www.finn.no/car/used/ad.html?finnkode=...,5,9559.0,185000,104.0
4,Subaru,Forester,2.0 D,2013,Diesel,Suv,Black,Tønsberg,Manual,https://www.finn.no/car/used/ad.html?finnkode=...,5,12216.0,231574,109.0


In [204]:
nw_offers['Country'] = 'Norway'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nw_offers['Country'] = 'Norway'


In [205]:
nw_offers.to_csv('cleaned_Norway_offers.csv', index=False)

14. Hungary

In [206]:
hg_offers=pd.read_csv('Hungary.csv')
hg_offers

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Euro norm,Body type,Color,City,Power,Transmission,Contact
0,"26,580 EUR (10,450,000 HUF)",Kia,Sportage,1.6 GDi,"3,999 km",2023,Petrol,5.0,Crossover,Silver,Szombathely,110 kW (150 HP),Automatic,https://joautok.hu/hasznaltauto/kia/sportage/g...
1,"24,670 EUR (9,699,000 HUF)",Kia,Xceed,1.6,3 km,2024,Petrol,,Crossover,White,Szombathely,118 kW (160 HP),Automatic,https://www.szalonauto.hu/szemelyauto/kia/xcee...
2,"25,410 EUR (9,990,000 HUF)",Kia,Sportage,1.6 GDi,3 km,2024,Petrol,,Crossover,Gray,Szombathely,111 kW (151 HP),Manual,https://www.szalonauto.hu/szemelyauto/kia/spor...
3,"48,303 EUR (18,990,000 HUF)",Volkswagen,Arteon,1.4,100 km,2023,Hybrid,,Coupe,Gray,Kisvárda,160 kW (218 HP),Automatic,https://www.hasznaltauto.hu/szemelyauto/volksw...
4,"44,172 EUR (17,366,000 HUF)",Ford,Mustang Mach-E,,610 km,2024,Electric,,Crossover,Green,Szeged,216 kW (294 HP),Automatic,https://www.szalonauto.hu/szemelyauto/ford/mus...
5,"35,641 EUR (14,012,000 HUF)",Ford,Mustang Mach-E,,440 km,2024,Electric,,Crossover,Green,Szeged,197 kW (268 HP),Automatic,https://www.szalonauto.hu/szemelyauto/ford/mus...
6,"47,794 EUR (18,790,000 HUF)",Volkswagen,Tiguan,2.0 TDI 4Motion,0 km,2024,Diesel,,Crossover,Black,Kisvárda,110 kW (150 HP),Automatic,https://www.szalonauto.hu/szemelyauto/volkswag...
7,"45,276 EUR (17,800,000 HUF)",Volkswagen,Passat,2.0,0 km,2024,Diesel,,Wagon,Black,Kisvárda,110 kW (150 HP),Automatic,https://www.szalonauto.hu/szemelyauto/volkswag...
8,"21,595 EUR (8,490,000 HUF)",Citroen,C4,1.2 puretech,2 km,2023,Petrol,,Hatchback,Black,Budaörs,96 kW (131 HP),Automatic,https://joautok.hu/hasznaltauto/citroen/c4/c4-...
9,"30,497 EUR (11,990,000 HUF)",Volkswagen,T-Cross,1.5 tsi,0 km,2024,Petrol,,Crossover,Cyan,Kisvárda,110 kW (150 HP),Automatic,https://www.szalonauto.hu/szemelyauto/volkswag...


In [207]:
rows_without_HUF = hg_offers[~hg_offers['Price'].str.contains('HUF',  na=False)]
rows_without_HUF

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Euro norm,Body type,Color,City,Power,Transmission,Contact


In [208]:
# there's no missing local currency values. Otherwise, perform hg_offers.loc[rows_without_HUF.index, 'Price(EUR)'] *= 0,0025

In [209]:
hg_offers['Price']= hg_offers['Price'].str.split('(').str[0]

In [210]:
hg_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Price         260 non-null    object 
 1   Make          260 non-null    object 
 2   Model         260 non-null    object 
 3   Trim          155 non-null    object 
 4   Mi            220 non-null    object 
 5   Year          260 non-null    int64  
 6   Fuel type     182 non-null    object 
 7   Euro norm     1 non-null      float64
 8   Body type     222 non-null    object 
 9   Color         103 non-null    object 
 10  City          137 non-null    object 
 11  Power         259 non-null    object 
 12  Transmission  210 non-null    object 
 13  Contact       260 non-null    object 
dtypes: float64(1), int64(1), object(12)
memory usage: 28.6+ KB


In [211]:
hg_offers.dropna(subset = ['Mi', 'Trim', 'Fuel type', 'Body type', 'Power', 'Transmission'], inplace=True)

In [212]:
hg_offers['Emission standard'] =hg_offers['Year'].apply(calculate_euro_norm)
hg_offers['Price(EUR)'] = hg_offers['Price'].apply(clean_price)
hg_offers['Mileage(km)'] = hg_offers['Mi'].apply(clean_mi)
hg_offers['kW'] = hg_offers['Power'].apply(power_kW_split)

In [213]:
hg_offers.drop(columns= ['Price', 'Mi', 'Power', 'Euro norm'], inplace= True)

In [214]:
hg_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 85 entries, 0 to 259
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               85 non-null     object 
 1   Model              85 non-null     object 
 2   Trim               85 non-null     object 
 3   Year               85 non-null     int64  
 4   Fuel type          85 non-null     object 
 5   Body type          85 non-null     object 
 6   Color              58 non-null     object 
 7   City               62 non-null     object 
 8   Transmission       85 non-null     object 
 9   Contact            85 non-null     object 
 10  Emission standard  85 non-null     int64  
 11  Price(EUR)         85 non-null     int64  
 12  Mileage(km)        85 non-null     int64  
 13  kW                 85 non-null     float64
dtypes: float64(1), int64(4), object(9)
memory usage: 10.0+ KB


In [215]:
hg_offers.loc[hg_offers['City'].isna(), 'City'] = "NA"
hg_offers.loc[hg_offers['Color'].isna(), 'Color'] = "NA"


In [216]:
hg_offers['Country'] = "Hungary"

In [217]:
hg_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW,Country
0,Kia,Sportage,1.6 GDi,2023,Petrol,Crossover,Silver,Szombathely,Automatic,https://joautok.hu/hasznaltauto/kia/sportage/g...,6,26580,3999,110.0,Hungary
1,Kia,Xceed,1.6,2024,Petrol,Crossover,White,Szombathely,Automatic,https://www.szalonauto.hu/szemelyauto/kia/xcee...,6,24670,3,118.0,Hungary
2,Kia,Sportage,1.6 GDi,2024,Petrol,Crossover,Gray,Szombathely,Manual,https://www.szalonauto.hu/szemelyauto/kia/spor...,6,25410,3,111.0,Hungary
3,Volkswagen,Arteon,1.4,2023,Hybrid,Coupe,Gray,Kisvárda,Automatic,https://www.hasznaltauto.hu/szemelyauto/volksw...,6,48303,100,160.0,Hungary
6,Volkswagen,Tiguan,2.0 TDI 4Motion,2024,Diesel,Crossover,Black,Kisvárda,Automatic,https://www.szalonauto.hu/szemelyauto/volkswag...,6,47794,0,110.0,Hungary


In [218]:
hg_offers.to_csv('cleaned_Hungary_offers.csv', index=False)

15. Czech Republic

In [219]:
cr_offers = pd.read_csv('Czech Republic.csv')
cr_offers

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact,Euro norm
0,"106,986 EUR (2,699,900 CZK)",Mercedes Benz,Gle-Class,450 AMG 4MATIC,"6,460 km",2023,Petrol,Suv,Black,Praha,280 kW (381 HP),Automatic,https://www.sportovnivozy.cz/186651-mercedes-b...,
1,"172,369 EUR (4,349,900 CZK)",Mercedes Benz,Sl-Class,4.0 AMG 55,"2,966 km",2023,Petrol,Convertible,Gray,Praha,350 kW (476 HP),Automatic,https://autocaris.cz/detail.php?inzerat=716194,
2,"71,322 EUR (1,799,900 CZK)",Porsche,Cayenne,4.0 Turbo,"138,990 km",2018,Petrol,Suv,Black,Praha,404 kW (549 HP),Automatic,https://www.sauto.cz/osobni/detail/porsche/cay...,
3,"27,734 EUR (699,900 CZK)",Volkswagen,Sharan,2.0 TDI,"166,950 km",2020,Diesel,Van,Blue,Praha,110 kW (150 HP),Automatic,https://www.autanet.cz/detail/volkswagen/shara...,
4,"7,489 EUR (189,000 CZK)",Subaru,Forester,2.0 i,"207,000 km",2010,Petrol,Suv,White,Žďár Nad Sázavou,110 kW (150 HP),Automatic,https://www.annonce.cz/inzerat/subaru-forester...,
5,"16,246 EUR (409,999 CZK)",Volkswagen,Touareg,4.0 TDI V8,"319,000 km",2010,Diesel,Suv,Brown,Brno,250 kW (340 HP),Automatic,https://www.sauto.cz/osobni/detail/volkswagen/...,
6,"45,565 EUR (1,149,900 CZK)",Dodge,Challenger,6.4,"59,305 km",2020,Petrol,Coupe,Black,Praha,362 kW (492 HP),Automatic,https://autocaris.cz/detail.php?inzerat=733813,
7,"7,528 EUR (189,999 CZK)",Jeep,Grand Cherokee,3.0 crd,"240,568 km",2010,Diesel,Suv,Black,Brno,160 kW (218 HP),Automatic,http://auto.rychle.cz/osobni-auto-O/Jeep/Grand...,
8,"4,358 EUR (109,999 CZK)",Peugeot,5008,1.6,"187,333 km",2011,Diesel,Mpv,White,Brno,82 kW (112 HP),Manual,https://yauto.cz/peugeot/5008/16-7mist-kombi-n...,
9,"6,736 EUR (169,999 CZK)",Bmw,X3,,"220,123 km",2008,Diesel,Suv,Black,Brno,130 kW (177 HP),Automatic,https://www.sauto.cz/osobni/detail/bmw/x3/2027...,


In [220]:
rows_without_CZK = cr_offers[~cr_offers['Price'].str.contains('CZK', na=False)]

In [221]:
rows_without_CZK

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact,Euro norm
258,"2,649,000 EUR",Bmw,X6,,"5,000 km",2023,Diesel,Suv,Blue,Praha,210 kW (286 HP),,https://www.sportovnivozy.cz/187447-bmw-x6-30x...,


In [222]:
cr_offers['Price'] = cr_offers['Price'].str.split('(').str[0]

In [223]:
cr_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact,Euro norm
0,"106,986 EUR",Mercedes Benz,Gle-Class,450 AMG 4MATIC,"6,460 km",2023,Petrol,Suv,Black,Praha,280 kW (381 HP),Automatic,https://www.sportovnivozy.cz/186651-mercedes-b...,
1,"172,369 EUR",Mercedes Benz,Sl-Class,4.0 AMG 55,"2,966 km",2023,Petrol,Convertible,Gray,Praha,350 kW (476 HP),Automatic,https://autocaris.cz/detail.php?inzerat=716194,
2,"71,322 EUR",Porsche,Cayenne,4.0 Turbo,"138,990 km",2018,Petrol,Suv,Black,Praha,404 kW (549 HP),Automatic,https://www.sauto.cz/osobni/detail/porsche/cay...,
3,"27,734 EUR",Volkswagen,Sharan,2.0 TDI,"166,950 km",2020,Diesel,Van,Blue,Praha,110 kW (150 HP),Automatic,https://www.autanet.cz/detail/volkswagen/shara...,
4,"7,489 EUR",Subaru,Forester,2.0 i,"207,000 km",2010,Petrol,Suv,White,Žďár Nad Sázavou,110 kW (150 HP),Automatic,https://www.annonce.cz/inzerat/subaru-forester...,


In [224]:
cr_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Price         260 non-null    object 
 1   Make          260 non-null    object 
 2   Model         260 non-null    object 
 3   Trim          241 non-null    object 
 4   Mi            260 non-null    object 
 5   Year          260 non-null    int64  
 6   Fuel type     260 non-null    object 
 7   Body type     260 non-null    object 
 8   Color         260 non-null    object 
 9   City          260 non-null    object 
 10  Power         260 non-null    object 
 11  Transmission  249 non-null    object 
 12  Contact       260 non-null    object 
 13  Euro norm     2 non-null      float64
dtypes: float64(1), int64(1), object(12)
memory usage: 28.6+ KB


In [225]:
cr_offers['Emission standard'] =cr_offers['Year'].apply(calculate_euro_norm)
cr_offers['Price(EUR)'] = cr_offers['Price'].apply(clean_price)
cr_offers['Mileage(km)'] = cr_offers['Mi'].apply(clean_mi)
cr_offers['kW'] = cr_offers['Power'].apply(power_kW_split)

In [226]:
cr_offers.loc[rows_without_CZK.index, 'Price(EUR)'] *= 0.040

In [227]:
cr_offers.drop(columns= ['Price', 'Mi', 'Power'], inplace= True)

In [228]:
cr_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 15 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               260 non-null    object 
 1   Model              260 non-null    object 
 2   Trim               241 non-null    object 
 3   Year               260 non-null    int64  
 4   Fuel type          260 non-null    object 
 5   Body type          260 non-null    object 
 6   Color              260 non-null    object 
 7   City               260 non-null    object 
 8   Transmission       249 non-null    object 
 9   Contact            260 non-null    object 
 10  Euro norm          2 non-null      float64
 11  Emission standard  260 non-null    int64  
 12  Price(EUR)         260 non-null    int64  
 13  Mileage(km)        260 non-null    int64  
 14  kW                 260 non-null    float64
dtypes: float64(2), int64(4), object(9)
memory usage: 30.6+ KB


In [229]:
cr_offers.drop(columns= ['Euro norm'], inplace= True)

In [230]:
cr_offers.dropna(subset=['Transmission','Trim', 'kW'], inplace =True)

In [231]:
cr_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 233 entries, 0 to 259
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               233 non-null    object 
 1   Model              233 non-null    object 
 2   Trim               233 non-null    object 
 3   Year               233 non-null    int64  
 4   Fuel type          233 non-null    object 
 5   Body type          233 non-null    object 
 6   Color              233 non-null    object 
 7   City               233 non-null    object 
 8   Transmission       233 non-null    object 
 9   Contact            233 non-null    object 
 10  Emission standard  233 non-null    int64  
 11  Price(EUR)         233 non-null    int64  
 12  Mileage(km)        233 non-null    int64  
 13  kW                 233 non-null    float64
dtypes: float64(1), int64(4), object(9)
memory usage: 27.3+ KB


In [232]:
cr_offers['Country'] = "Czech Republic"

In [233]:
cr_offers.to_csv('cleaned_Czech_offers.csv', index=False)

16. Romania

In [234]:
rm_offers = pd.read_csv('Romania.csv')
rm_offers.head()

Unnamed: 0,Price,Make,Model,Trim,Mi,Year,Fuel type,Body type,Color,City,Power,Transmission,Contact,Euro norm
0,"61,469 EUR",Porsche,Cayman,2.5,"66,920 km",2017,Petrol,Coupe,White,Constanţa,191 kW (260 HP),Automatic,https://www.leasingsh.ro/porsche-cayman-2017-P...,
1,"59,269 EUR",Mercedes Benz,Gls-Class,,"99,988 km",2018,Petrol,Suv,Gray,Constanţa,182 kW (248 HP),Automatic,https://www.leasingsh.ro/mercedes-benz-gls-400...,
2,"48,269 EUR",Volvo,Xc90,2.0,"50,000 km",2018,Petrol,Suv,Black,Constanţa,170 kW (231 HP),Automatic,https://www.leasingsh.ro/volvo-xc-90-2018-VOL4...,
3,"14,890 EUR",Bmw,X3,,"209,462 km",2013,Diesel,Suv,White,Bistriţa,184 kW (250 HP),Automatic,https://cautimasina.ro/bmw-x3-2013-diesel-seco...,5.0
4,"17,493 EUR",Volkswagen,T-Cross,1.0 tsi,"70,400 km",2021,Petrol,Crossover,Gray,Bucureşti,70 kW (95 HP),Automatic,https://www.anuntul.ro/anunt-autoturism-volksw...,


In [235]:
rm_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Price         260 non-null    object 
 1   Make          260 non-null    object 
 2   Model         260 non-null    object 
 3   Trim          208 non-null    object 
 4   Mi            260 non-null    object 
 5   Year          260 non-null    int64  
 6   Fuel type     256 non-null    object 
 7   Body type     243 non-null    object 
 8   Color         221 non-null    object 
 9   City          171 non-null    object 
 10  Power         249 non-null    object 
 11  Transmission  249 non-null    object 
 12  Contact       260 non-null    object 
 13  Euro norm     92 non-null     float64
dtypes: float64(1), int64(1), object(12)
memory usage: 28.6+ KB


In [236]:
rm_offers.loc[rm_offers['Color'].isna(), 'Color'] = "NA"
rm_offers.loc[rm_offers['City'].isna(), 'City'] = "NA"

In [237]:
rm_offers['Emission standard'] =rm_offers['Year'].apply(calculate_euro_norm)
rm_offers['Price(EUR)'] = rm_offers['Price'].apply(clean_price)
rm_offers['Mileage(km)'] = rm_offers['Mi'].apply(clean_mi)
rm_offers['kW'] = rm_offers['Power'].apply(power_kW_split)

In [238]:
rm_offers.drop(columns= ['Price', 'Mi', 'Power', 'Euro norm'], inplace= True)

In [239]:
rm_offers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               260 non-null    object 
 1   Model              260 non-null    object 
 2   Trim               208 non-null    object 
 3   Year               260 non-null    int64  
 4   Fuel type          256 non-null    object 
 5   Body type          243 non-null    object 
 6   Color              260 non-null    object 
 7   City               260 non-null    object 
 8   Transmission       249 non-null    object 
 9   Contact            260 non-null    object 
 10  Emission standard  260 non-null    int64  
 11  Price(EUR)         260 non-null    int64  
 12  Mileage(km)        260 non-null    int64  
 13  kW                 249 non-null    float64
dtypes: float64(1), int64(4), object(9)
memory usage: 28.6+ KB


In [240]:
rm_offers = rm_offers.dropna()

In [241]:
rm_offers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 168 entries, 0 to 258
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               168 non-null    object 
 1   Model              168 non-null    object 
 2   Trim               168 non-null    object 
 3   Year               168 non-null    int64  
 4   Fuel type          168 non-null    object 
 5   Body type          168 non-null    object 
 6   Color              168 non-null    object 
 7   City               168 non-null    object 
 8   Transmission       168 non-null    object 
 9   Contact            168 non-null    object 
 10  Emission standard  168 non-null    int64  
 11  Price(EUR)         168 non-null    int64  
 12  Mileage(km)        168 non-null    int64  
 13  kW                 168 non-null    float64
dtypes: float64(1), int64(4), object(9)
memory usage: 19.7+ KB


In [242]:
rm_offers['Country'] = "Romania"

In [243]:
rm_offers.head()

Unnamed: 0,Make,Model,Trim,Year,Fuel type,Body type,Color,City,Transmission,Contact,Emission standard,Price(EUR),Mileage(km),kW,Country
0,Porsche,Cayman,2.5,2017,Petrol,Coupe,White,Constanţa,Automatic,https://www.leasingsh.ro/porsche-cayman-2017-P...,6,61469,66920,191.0,Romania
2,Volvo,Xc90,2.0,2018,Petrol,Suv,Black,Constanţa,Automatic,https://www.leasingsh.ro/volvo-xc-90-2018-VOL4...,6,48269,50000,170.0,Romania
4,Volkswagen,T-Cross,1.0 tsi,2021,Petrol,Crossover,Gray,Bucureşti,Automatic,https://www.anuntul.ro/anunt-autoturism-volksw...,6,17493,70400,70.0,Romania
6,Peugeot,3008,1.5,2019,Diesel,Crossover,Black,Constanţa,Automatic,https://www.leasingsh.ro/peugeot-3008-2019-PEU...,6,20590,55301,96.0,Romania
7,Mazda,3,1.6,2010,Petrol,Hatchback,Black,Cluj Napoca,Manual,https://www.anuntul.ro/anunt-autoturism-mazda-...,5,6795,163000,77.0,Romania


In [244]:
rm_offers.to_csv('cleaned_Romania_offers.csv', index=False)