In [1]:
from ultralytics import YOLO
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import joblib


# ------------------ Part 1: YOLO Model for Damage Detection ------------------

# Load a pre-trained YOLO model
model = YOLO("yolov8n.pt")

# Train the model on your custom dataset
results = model.train(data=r"F:\ABDUL\ABDUL 2024\CAR_PRICE_PRE_ BASED_CAR_DAMAGE_DEC\car_price\COMBINE\Dataset\data.yaml", epochs=10)

# Save the best weights
model.save("best_damage_detection_model.pt")

def detect_damages(image_path):
    """
    Detect damages from the car image using a trained YOLO model.
    
    Args:
    - image_path (str): Path to the image file.

    Returns:
    - damage_count (int): Number of detected damages.
    """
    infer = YOLO("best_damage_detection_model.pt")
    results = infer.predict(image_path)
    # Extract damage information from results (e.g., count of damages)
    damage_count = len(results[0].boxes)  # Assuming boxes contain detected damages
    return damage_count


# ------------------ Part 2: Car Price Prediction with Random Forest ------------------

# Load dataset
df = pd.read_csv('Car_price.csv')

# Drop non-predictive columns and preprocess data
df.drop(columns=['name'], inplace=True)
categorical_cols = ['fuel', 'seller_type', 'transmission', 'owner']
df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)

# Convert features to numerical values and handle missing values
df['mileage'] = df['mileage'].str.extract('(\d+\.\d+|\d+)').astype(float)
df['engine'] = df['engine'].str.extract('(\d+)').astype(float)
df['max_power'] = df['max_power'].str.extract('(\d+\.\d+|\d+)').astype(float)
df['torque'] = df['torque'].str.extract('(\d+\.\d+|\d+)').astype(float)
df.fillna(df.median(), inplace=True)

# Define features and target variable
X = df.drop(columns=['selling_price'])
y = df['selling_price']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest model
model_rf = RandomForestRegressor(random_state=42)
model_rf.fit(X_train, y_train)

# Save the model to a file
joblib.dump(model_rf, 'car_price_model.pkl')


# ------------------ Part 3: Preprocess and Predict Car Price ------------------

def preprocess_and_predict(input_data):
    """
    Preprocess the input data and predict the car price using the trained Random Forest model.
    
    Args:
    - input_data (dict): Input data for prediction.

    Returns:
    - predicted_price (float): Predicted car price.
    """
    # Load the pre-trained Random Forest model
    model_rf = joblib.load('car_price_model.pkl')

    # Process input data similar to training data preprocessing
    df_input = pd.DataFrame([input_data])
    categorical_cols = ['fuel', 'seller_type', 'transmission', 'owner']
    df_input = pd.get_dummies(df_input, columns=categorical_cols, drop_first=True)

    # Convert features to numerical values and handle missing values as done in training
    df_input['mileage'] = df_input['mileage'].str.extract('(\d+\.\d+|\d+)').astype(float)
    df_input['engine'] = df_input['engine'].str.extract('(\d+)').astype(float)
    df_input['max_power'] = df_input['max_power'].str.extract('(\d+\.\d+|\d+)').astype(float)
    df_input['torque'] = df_input['torque'].str.extract('(\d+\.\d+|\d+)').astype(float)
    df_input.fillna(df_input.median(), inplace=True)

    # Ensure all columns used during training are present in the input data
    missing_cols = set(X.columns) - set(df_input.columns)
    for col in missing_cols:
        df_input[col] = 0

    # Reorder columns to match the model's input structure
    df_input = df_input[X.columns]

    # Make predictions for car price based on input data
    predicted_price = model_rf.predict(df_input)[0]
    
    return predicted_price


New https://pypi.org/project/ultralytics/8.3.28 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.21  Python-3.8.0 torch-2.4.1+cpu CPU (Intel Core(TM) i9-14900K)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=F:\ABDUL\ABDUL 2024\CAR_PRICE_PRE_ BASED_CAR_DAMAGE_DEC\car_price\COMBINE\Dataset\data.yaml, epochs=10, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train2, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=F

[34m[1mtrain: [0mScanning F:\ABDUL\ABDUL 2024\CAR_PRICE_PRE_ BASED_CAR_DAMAGE_DEC\car_price\COMBINE\Dataset\train\labels.cache...[0m
[34m[1mval: [0mScanning F:\ABDUL\ABDUL 2024\CAR_PRICE_PRE_ BASED_CAR_DAMAGE_DEC\car_price\COMBINE\Dataset\valid\labels.cache... 6[0m


Plotting labels to runs\detect\train2\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000476, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns\detect\train2[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10         0G      1.952      4.707       2.21          8        640: 100%|██████████| 145/145 [06:37<00:00,  
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:35

                   all        679       1054      0.688     0.0802     0.0521     0.0199






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10         0G      1.865      3.889      2.072         14        640: 100%|██████████| 145/145 [06:29<00:00,  
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:34

                   all        679       1054      0.452      0.156      0.121     0.0538






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10         0G      1.836      3.519       2.02         10        640: 100%|██████████| 145/145 [04:55<00:00,  
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:23


                   all        679       1054      0.464      0.197      0.127     0.0571

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10         0G       1.81      3.245      1.985         15        640: 100%|██████████| 145/145 [04:12<00:00,  
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:22

                   all        679       1054      0.465      0.215      0.164     0.0701






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10         0G      1.774       3.01      1.957         10        640: 100%|██████████| 145/145 [04:08<00:00,  
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:22

                   all        679       1054      0.477      0.265      0.236      0.102






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10         0G       1.76      2.806      1.911         13        640: 100%|██████████| 145/145 [04:17<00:00,  
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:23

                   all        679       1054      0.459      0.279      0.241      0.107






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10         0G      1.711      2.635      1.878         12        640: 100%|██████████| 145/145 [04:23<00:00,  
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:22

                   all        679       1054      0.392      0.291      0.278      0.126






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10         0G       1.68      2.494      1.835          8        640: 100%|██████████| 145/145 [04:23<00:00,  
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:22

                   all        679       1054      0.406      0.307      0.298      0.132






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10         0G      1.637       2.37      1.811         12        640: 100%|██████████| 145/145 [04:30<00:00,  
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:22

                   all        679       1054      0.371       0.36      0.327      0.148






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10         0G      1.593      2.251      1.756          9        640: 100%|██████████| 145/145 [04:36<00:00,  
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:23

                   all        679       1054       0.37      0.384      0.349      0.158






10 epochs completed in 0.882 hours.
Optimizer stripped from runs\detect\train2\weights\last.pt, 6.2MB
Optimizer stripped from runs\detect\train2\weights\best.pt, 6.2MB

Validating runs\detect\train2\weights\best.pt...
Ultralytics 8.3.21  Python-3.8.0 torch-2.4.1+cpu CPU (Intel Core(TM) i9-14900K)
Model summary (fused): 168 layers, 3,008,963 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:18


                   all        679       1054      0.371      0.384      0.351      0.158
Front-Windscreen-Damage         32         33      0.387      0.424      0.402      0.184
      Headlight-Damage         64         68      0.299      0.441       0.37      0.175
Rear-windscreen-Damage         50         51      0.667      0.706      0.718      0.368
     RunningBoard-Dent         25         30      0.228      0.167      0.151     0.0429
     Sidemirror-Damage         26         26        0.7      0.538      0.625      0.347
      Signlight-Damage          5          5          0          0    0.00637    0.00285
      Taillight-Damage         31         31      0.642      0.548       0.62      0.312
           bonnet-dent        148        152      0.582       0.73      0.689       0.31
             boot-dent         20         20      0.458      0.129      0.115     0.0367
        doorouter-dent        113        142       0.35      0.671      0.503      0.221
           fender-de

In [2]:
# ------------------ Part 4: Combine Damage Detection and Car Price Prediction ------------------

# Example raw input data including image path for damage detection
raw_input_data = {
    'year': 2014,
    'km_driven': 145500,
    'fuel': 'Diesel',
    'seller_type': 'Individual',
    'transmission': 'Manual',
    'owner': 'First Owner',
    'mileage': '23.4 kmpl',
    'engine': '1248 CC',
    'max_power': '74 bhp',
    'torque': '190Nm',
    'seats': 5,
}

# Detect damages from an image and predict price based on CSV data and damages detected.
image_path = "https://c8.alamy.com/comp/C05MX1/accident-damage-on-a-car-after-an-accident-C05MX1.jpg"  # Path to the car image for damage detection

# Detect the number of damages in the image
damage_count = detect_damages(image_path)

# Predict the car price using the preprocessed data
predicted_price = preprocess_and_predict(raw_input_data)

# Adjust price based on the number of damages detected (assuming each damage reduces price by 1000)
final_price_estimate = predicted_price - (damage_count * 1000)

print(f"Predicted Selling Price after considering damages: {final_price_estimate}")

#in this code write after train only load models then make prediction


Downloading https://c8.alamy.com/comp/C05MX1/accident-damage-on-a-car-after-an-accident-C05MX1.jpg to 'accident-damage-on-a-car-after-an-accident-C05MX1.jpg'...


183kB [00:00, 830kB/s]

image 1/1 F:\ABDUL\ABDUL 2024\CAR_PRICE_PRE_ BASED_CAR_DAMAGE_DEC\car_price\COMBINE\accident-damage-on-a-car-after-an-accident-C05MX1.jpg: 480x640 3 doorouter-dents, 45.0ms
Speed: 3.0ms preprocess, 45.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)
Predicted Selling Price after considering damages: 450449.87



