### Step 1: Import Libraries and csv files

In [12]:
# Import the required libraries
import pandas as pd
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import metrics
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression

import warnings
warnings.filterwarnings('ignore')

In [13]:
# Read the CSV file into a Pandas DataFrame
car_prices_df = pd.read_csv(Path("Resources/car_prices.csv"))

# Review the DataFrame
car_prices_df.head()


Unnamed: 0,year,make,model,trim,body,transmission,vin,state,condition,odometer,color,interior,seller,mmr,sellingprice,saledate
0,2015,Kia,Sorento,LX,SUV,automatic,5xyktca69fg566472,ca,5.0,16639.0,white,black,kia motors america inc,20500.0,21500.0,Tue Dec 16 2014 12:30:00 GMT-0800 (PST)
1,2015,Kia,Sorento,LX,SUV,automatic,5xyktca69fg561319,ca,5.0,9393.0,white,beige,kia motors america inc,20800.0,21500.0,Tue Dec 16 2014 12:30:00 GMT-0800 (PST)
2,2014,BMW,3 Series,328i SULEV,Sedan,automatic,wba3c1c51ek116351,ca,45.0,1331.0,gray,black,financial services remarketing (lease),31900.0,30000.0,Thu Jan 15 2015 04:30:00 GMT-0800 (PST)
3,2015,Volvo,S60,T5,Sedan,automatic,yv1612tb4f1310987,ca,41.0,14282.0,white,black,volvo na rep/world omni,27500.0,27750.0,Thu Jan 29 2015 04:30:00 GMT-0800 (PST)
4,2014,BMW,6 Series Gran Coupe,650i,Sedan,automatic,wba6b2c57ed129731,ca,43.0,2641.0,gray,black,financial services remarketing (lease),66000.0,67000.0,Thu Dec 18 2014 12:30:00 GMT-0800 (PST)


In [14]:
# Read the CSV file into a Pandas DataFrame
new_cars_df = pd.read_csv(Path("Resources/new_cars_prices.csv"))

# Review the DataFrame
new_cars_df.head()

Unnamed: 0,web-scraper-order,Car Model,Old Price,Price Change,New Price,date_range
0,1680210890-1,Porsche Cayenne A/T / Coupe 2020,"2,262,876\nEGP","trending_down\n-82,876\nEGP","2,180,000\nEGP",18/11/2020
1,1680210890-2,Porsche Cayenne A/T / Coupe 2020,"2,078,727\nEGP","trending_up\n+184,149\nEGP","2,262,876\nEGP",14/10/2019
2,1680210890-3,Porsche Cayenne A/T / S 2020,"2,388,000\nEGP","trending_down\n-93,000\nEGP","2,295,000\nEGP",18/11/2020
3,1680210890-4,Porsche Cayenne A/T / S 2020,"2,262,876\nEGP","trending_up\n+125,124\nEGP","2,388,000\nEGP",15/10/2019
4,1680210890-5,Lexus LX 570 Automtic 2020,"5,600,000\nEGP","trending_down\n-350,000\nEGP","5,250,000\nEGP",18/05/2020


In [15]:
# Read the CSV file into a Pandas DataFrame
used_cars_df = pd.read_csv(Path("Resources/used_car_prices.csv"))

# Review the DataFrame
used_cars_df.head()

Unnamed: 0,web-scraper-order,Car Model,Month/Year,Average price,Minimum price,Maximum price
0,1680204632-1,Skoda Octavia A8 2022,2023-03,"967,000 EGP","926,000 EGP","1,017,000 EGP"
1,1680204632-2,Skoda Octavia A8 2022,2023-02,"979,000 EGP","931,000 EGP","1,045,000 EGP"
2,1680204632-3,Skoda Octavia A8 2022,2023-01,"917,000 EGP","893,000 EGP","950,000 EGP"
3,1680204632-4,Skoda Octavia A8 2022,2022-12,"881,000 EGP","793,000 EGP","950,000 EGP"
4,1680204632-5,Skoda Octavia A8 2022,2022-11,"868,000 EGP","789,000 EGP","950,000 EGP"


In [16]:
# Check for Null Values
car_prices_df.isnull().sum()

year                0
make            10301
model           10399
trim            10651
body            13195
transmission    65352
vin                 4
state               0
condition       11820
odometer           94
color             749
interior          749
seller              0
mmr                38
sellingprice       12
saledate           12
dtype: int64

In [17]:
#car_prices_df["make"] = car_prices_df["make"].fillna("0")
#car_prices_df.head()

In [7]:
car_prices_df.isnull().mean()*100

year             0.000000
make             1.843292
model            1.860829
trim             1.905922
body             2.361154
transmission    11.694287
vin              0.000716
state            0.000000
condition        2.115107
odometer         0.016821
color            0.134028
interior         0.134028
seller           0.000000
mmr              0.006800
sellingprice     0.002147
saledate         0.002147
dtype: float64

In [8]:
new_cars_df.isnull().sum()

web-scraper-order    0
Car Model            0
Old Price            0
Price Change         0
New Price            0
date_range           0
dtype: int64

In [9]:
used_cars_df.isnull().sum()

web-scraper-order      0
Car Model              0
Month/Year           478
Average price        478
Minimum price        478
Maximum price        478
dtype: int64

### Step 2: Exploratory Data Analysis

In [20]:
# Separate the data into target and features

# Separate the y variable, the target
y = car_prices_df["sellingprice"]

# Separate the X variable, the features
X = car_prices_df.drop(columns="sellingprice")

In [None]:
g = sns.FacetGrid(tips, col="sex", hue="smoker")
g.map(sns.scatterplot, "total_bill", "tip", alpha=.7)
g.add_legend()