# Campervan Projekt

## Libraries and settings

In [16]:
# Libraries
import os
import re
import pprint
import requests
import numpy as np
import pandas as pd

import geopandas as gpd

from shapely.geometry import Polygon

import matplotlib.patches as patches
from matplotlib import pyplot as plt

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

print(os.getcwd())

/workspaces/project_Autoscout_Coding_Pros/Project_Campers


## Import data

In [17]:
# Read data
df = pd.read_csv("./data/Autoscout24_Camper.csv", sep=",", encoding='utf-8')

# Show first records of data frame
df.head()


Unnamed: 0,web-scraper-order,web-scraper-start-url,autoscout24,autoscout24-href,description_raw,price_raw,mileage_raw,fuel_type_raw,location_raw
0,1733470796-1,https://www.autoscout24.ch/de/s/vc-camper?mile...,,https://www.autoscout24.ch/de/d/mercedes-benz-...,MERCEDES-BENZ Camper,CHF 15'900.–,265'000 km,Diesel,"Mittelstrasse 38, 3613 Steffisburg"
1,1733470799-2,https://www.autoscout24.ch/de/s/vc-camper?mile...,,https://www.autoscout24.ch/de/d/carado-i-449-e...,CARADO I 449 Édition 15 Fiat Capron 169L/1449,CHF 71'900.–,14'000 km,Diesel,"Zone industrielle Les Portettes 2c, 1312 Eclépens"
2,1733470801-3,https://www.autoscout24.ch/de/s/vc-camper?mile...,,https://www.autoscout24.ch/de/d/dethleffs-fiat...,DETHLEFFS Fiat Ducato,CHF 37'800.–,90'000 km,Diesel,"Rte du St-Bernard 8, 1937 Orsières"
3,1733470805-4,https://www.autoscout24.ch/de/s/vc-camper?mile...,,https://www.autoscout24.ch/de/d/fiat-euro-mast...,FIAT Euro Master Wohnmobil,CHF 19'900.–,187'236 km,Diesel,"Badenerstrasse 84, 8952 Schlieren"
4,1733470807-5,https://www.autoscout24.ch/de/s/vc-camper?mile...,,https://www.autoscout24.ch/de/d/vw-t61-califor...,VW T6.1 California 2.0 TDI Ocean Edition Liber...,CHF 84'500.–,16'500 km,Diesel,8852 Altendorf


### Count numbre of rows and columns in the dateframe

In [18]:
# Dimension (rows, columns)
print('Dimension:', df.shape)

# Number of rows
print('Number of rows:', df.shape[0])

# Number of columns
print('Number of columns:', df.shape[1])

Dimension: (1587, 9)
Number of rows: 1587
Number of columns: 9


### Get Data types from Web scraping

In [19]:
# Get data types (note that in pandas, a string is referred to as 'object')
df.dtypes

web-scraper-order         object
web-scraper-start-url     object
autoscout24              float64
autoscout24-href          object
description_raw           object
price_raw                 object
mileage_raw               object
fuel_type_raw             object
location_raw              object
dtype: object

### Extract and save relevant information from raw data using regular expressions (regex)

### Extract Price

In [24]:
# Extract values from 'price_raw' column
price = []
for i in df['price_raw']:
    d1 = re.findall(r'[0-9]+', str(i))
    try:
        d2 = ''.join(d1).replace("'", "").replace("CHF", "").replace(".-", "").strip()
    except:
        d2 = None
    price.append(d2)

# Save as new variable in the pandas data frame
df['price'] = pd.Series(price, dtype="int")
    
# Print first 5 values
print(df['price_raw'].head(5), '\n')
print(df['price'].head(5))

0    CHF 15'900.–
1    CHF 71'900.–
2    CHF 37'800.–
3    CHF 19'900.–
4    CHF 84'500.–
Name: price_raw, dtype: object 

0    15900
1    71900
2    37800
3    19900
4    84500
Name: price, dtype: int64


### Extract Mileage

In [26]:
# Extract values from 'mileage_raw' column
mileage = []
for i in df['mileage_raw']:
    d1 = re.findall(r'[0-9]+', str(i))
    try:
        d2 = ''.join(d1).replace("'", "").replace("km", "").replace(".-", "").strip()
    except:
        d2 = None
    mileage.append(d2)

# Save as new variable in the pandas data frame
df['mileage'] = pd.Series(mileage, dtype="int")
    
# Print first 5 values
print(df['mileage_raw'].head(5), '\n')
print(df['mileage'].head(5))

0    265'000 km
1     14'000 km
2     90'000 km
3    187'236 km
4     16'500 km
Name: mileage_raw, dtype: object 

0    265000
1     14000
2     90000
3    187236
4     16500
Name: mileage, dtype: int64
