In [51]:
import pandas as pd
import json
import numpy as np
import datetime


In [29]:
def handle_price(price):
    try:
        return float(price)
    except ValueError:
        return np.nan

In [44]:
# Pandas settings
# representing large numbers properly
pd.options.display.float_format = '{:.2f}'.format


In [25]:
FILE_PATH='/Users/rizwanqaiser/Play/pkwheels_scraper/datasets/output-grande.json'
df = pd.read_json(FILE_PATH)

In [38]:
# Handling price data
df['Price'] = df['Price'].apply(handle_price)
df['priceNotes'] = df['Price'].apply(lambda x: 'Call for price' if pd.isnull(x) else '')
cols = df.columns.tolist()
cols.insert(cols.index('Price') + 1, cols.pop(cols.index('priceNotes')))
df = df[cols]

In [39]:
# Converting data types
df['Ad No'] = df['Ad No'].astype(str)
df['Name'] = df['Name'].astype(str)
df['Price'] = df['Price'].astype(float)
df['Model Year'] = pd.to_datetime(df['Model Year'], format='%Y').dt.year
df['Mileage'] = df['Mileage'].astype(int)
df['Engine Type'] = df['Engine Type'].astype(str)
df['Engine Capacity'] = df['Engine Capacity'].astype(str)
df['Transmission'] = df['Transmission'].astype(str)
df['Color'] = df['Color'].astype(str)
df['Assembly'] = df['Assembly'].astype(str)
df['Body Type'] = df['Body Type'].astype(str)
df['Features'] = df['Features'].astype(str)
df['Last Updated'] = pd.to_datetime(df['Last Updated'])
df['URL'] = df['URL'].astype(str)

In [45]:
# Summary Statistics
df.describe()


Unnamed: 0,Price,Model Year,Mileage,Registered City,Last Updated
count,411.0,426.0,426.0,0.0,426
mean,5608442.82,2018.41,47691.58,,2023-05-26 04:30:25.352112640
min,1365000.0,1993.0,38.0,,2023-04-28 00:00:00
25%,4350000.0,2017.0,16000.0,,2023-05-18 00:00:00
50%,5800000.0,2019.5,40000.0,,2023-05-30 00:00:00
75%,6900000.0,2021.0,71000.0,,2023-06-06 00:00:00
max,8000000.0,2022.0,200000.0,,2023-06-11 00:00:00
std,1471532.22,4.29,38048.38,,


In [53]:
# Metric definitions
 """
    AgeScore:
    This score represents the relative 'newness' 
    of the car. A higher value means the car is newer relative to the maximum car age considered.
    """

MileageScore: "This score represents the relative amount of use the car has had. A higher score means the car has lower mileage relative to the maximum mileage considered."

PriceScore: "This score represents the relative affordability of the car. A higher score means the car is cheaper relative to the maximum price considered."

CarValueScore: "This score is an average of the Age, Mileage, and Price scores. It provides an overall value score for the car considering these three factors, with a higher score indicating better value."

# Add columns with metrics
current_year = datetime.datetime.now().year
MaxCarAge = 20  # adjust as per your definition of an 'old' car
MaxMileage = 200000  # adjust as per your definition of 'high-mileage'
MaxPrice = 30000  # adjust as per the maximum price you are willing to pay

df['AgeScore'] = 1 - ((current_year - df['Model Year']) / MaxCarAge)
df['MileageScore'] = 1 - (df['Mileage'] / MaxMileage)
df['PriceScore'] = 1 - (df['Price'] / MaxPrice)

df['CarValueScore'] = (df['AgeScore'] + df['MileageScore'] + df['PriceScore']) / 3

In [54]:
df

Unnamed: 0,Ad No,Name,Price,priceNotes,Model Year,Location,Mileage,Registered City,Engine Type,Engine Capacity,...,Color,Assembly,Body Type,Features,Last Updated,URL,AgeScore,MileageScore,PriceScore,CarValueScore
0,7548009,Toyota Corolla Altis Grande CVT-i 1.8 2020,5750000.00,,2020,\n,48300,,Petrol,1798 cc,...,Super white,Local,Sedan,"ABS, AM/FM Radio, Air Bags, Air Conditioning,...",2023-06-11,https://www.pakwheels.com/used-cars/toyota-cor...,0.85,0.76,-190.67,-63.02
1,7547540,Toyota Corolla Altis Grande X CVT-i 1.8 Beige ...,7500000.00,,2022,\n,600,,Petrol,1799 cc,...,Super White,Local,Sedan,"ABS, AM/FM Radio, Air Bags, Air Conditioning,...",2023-06-10,https://www.pakwheels.com/used-cars/toyota-cor...,0.95,1.00,-249.00,-82.35
2,7499742,Toyota Corolla Altis Grande X CVT-i 1.8 Black ...,5995000.00,,2021,\n,22000,,Petrol,1798 cc,...,Attitude Black,Local,Sedan,"ABS, AM/FM Radio, Air Bags, Air Conditioning,...",2023-06-11,https://www.pakwheels.com/used-cars/toyota-cor...,0.90,0.89,-198.83,-65.68
3,7547438,Toyota Corolla Altis Grande CVT-i 1.8 2016,4350000.00,,2016,\n,84000,,Petrol,1798 cc,...,Bronze Mica,Local,Sedan,"ABS, AM/FM Radio, Air Bags, Air Conditioning,...",2023-06-10,https://www.pakwheels.com/used-cars/toyota-cor...,0.65,0.58,-144.00,-47.59
4,7547261,Toyota Corolla Altis Grande X CVT-i 1.8 Beige ...,6990000.00,,2022,\n,7000,,Petrol,1800 cc,...,White,Local,Sedan,"ABS, AM/FM Radio, Air Bags, Air Conditioning,...",2023-06-10,https://www.pakwheels.com/used-cars/toyota-cor...,0.95,0.96,-232.00,-76.70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
421,7385331,Toyota Corolla Altis Grande CVT-i 1.8 2014,3900000.00,,2014,\n,100000,,Petrol,1798 cc,...,Super white,Local,Sedan,"ABS, AM/FM Radio, Air Bags, Air Conditioning,...",2023-04-28,https://www.pakwheels.com/used-cars/toyota-cor...,0.55,0.50,-129.00,-42.65
422,7388250,Toyota Corolla Altis Grande CVT-i 1.8 2019,5540000.00,,2019,\n,37000,,Petrol,1798 cc,...,Super white,Local,Sedan,"ABS, AM/FM Radio, Air Bags, Air Conditioning,...",2023-04-28,https://www.pakwheels.com/used-cars/toyota-cor...,0.80,0.81,-183.67,-60.68
423,7405787,Toyota Corolla Altis Grande CVT-i 1.8 2019,5600000.00,,2019,\n,45800,,Petrol,1798 cc,...,Super white,Local,Sedan,"ABS, AM/FM Radio, Air Bags, Air Conditioning,...",2023-05-02,https://www.pakwheels.com/used-cars/toyota-cor...,0.80,0.77,-185.67,-61.37
424,7404645,Toyota Corolla Altis Grande CVT-i 1.8 2016,4400000.00,,2016,\n,70000,,Petrol,1798 cc,...,Super white,Local,Sedan,"ABS, AM/FM Radio, Air Bags, Air Conditioning,...",2023-05-03,https://www.pakwheels.com/used-cars/toyota-cor...,0.65,0.65,-145.67,-48.12
