In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [2]:

# Read in the CSV file as a pandas DataFrame.
df = pd.read_csv('Resources/model_car_prices_no_mmr.csv')

# Review the DataFrame
df.head()


Unnamed: 0,year,make,body,color,interior,sellingprice,odometer,automatic
0,2015,18,SUV,17,1,21500.0,16639.0,True
1,2015,18,SUV,17,0,21500.0,9393.0,True
2,2014,2,Sedan,8,1,30000.0,1331.0,True
3,2015,40,Sedan,17,1,27750.0,14282.0,True
4,2014,2,Sedan,8,1,67000.0,2641.0,True


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd

# Assuming df is your DataFrame

# Features to include (excluding 'body' as it's not used for modeling)
features = ['year', 'odometer', 'make', 'interior', 'color', 'automatic']  # Assuming all features are appropriately encoded

# List of unique body types
unique_bodies = df['body'].unique()

# Process each body type separately
for body_type in unique_bodies:
    print(f"Results for body type: {body_type}")
    
    # Filter the DataFrame for the current body type
    df_body = df[df['body'] == body_type]

    # Set up X and y (excluding 'body' from the model)
    X = df_body[features]
    y = df_body['sellingprice']
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
    
    # Initialize the Lasso Regressor
    lasso_model = Lasso(alpha=1.0, random_state=1)  # alpha is the regularization strength
    
    # Train the model
    lasso_model.fit(X_train, y_train)
    
    # Make predictions using the testing set
    y_pred = lasso_model.predict(X_test)
    
    # Calculate the performance metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    # Print the performance metrics
    print(f'Mean Squared Error (MSE) for {body_type}: {mse}')
    print(f'R-squared for {body_type}: {r2}\n')



Results for body type: SUV
Mean Squared Error (MSE) for SUV: 50863980.477113165
R-squared for SUV: 0.5164161899006916

Results for body type: Sedan
Mean Squared Error (MSE) for Sedan: 33244201.501077134
R-squared for Sedan: 0.43271484081062994

Results for body type: Convertible
Mean Squared Error (MSE) for Convertible: 119231386.65767275
R-squared for Convertible: 0.4056136041046525

Results for body type: Coupe
Mean Squared Error (MSE) for Coupe: 103759368.52537708
R-squared for Coupe: 0.41010328858456746

Results for body type: Wagon
Mean Squared Error (MSE) for Wagon: 19201411.05309631
R-squared for Wagon: 0.6540820979380113

Results for body type: Hatchback
Mean Squared Error (MSE) for Hatchback: 15590918.311416002
R-squared for Hatchback: 0.4372247957185049

Results for body type: Crew_Truck
Mean Squared Error (MSE) for Crew_Truck: 34329715.70630622
R-squared for Crew_Truck: 0.6670588694023014

Results for body type: Minivan
Mean Squared Error (MSE) for Minivan: 11011174.42328856