In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.preprocessing import OneHotEncoder
import joblib


# 1. Load the dataset
df = pd.read_csv("car_price_prediction.csv")

# 2. Data Preprocessing
# Rename columns
new_columns = {
    'Prod. year': 'Produced_year',
    'Gear box type': 'Gear_type',
    'Fuel type': 'Fuel_type'
}
df = df.rename(columns=new_columns)

# Handling potential missing values (you can change the strategy if needed)
df.dropna(inplace=True)  # Drop rows with missing values

# Define features and target
X = df.drop('Price', axis=1)
y = df['Price']

# Define numeric and categorical columns
numeric_features = ['Produced_year']
categorical_features = ['Manufacturer', 'Model', 'Category','Fuel_type', 'Gear_type']

# Create transformers
numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore', drop='first')


# 3. Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Define and train the model
# Use ColumnTransformer to apply the transformations to the correct columns
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Create a pipeline that first applies the column transformer and then fits the model
lasso_model = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', Lasso(alpha=1.0))
])

# Train the model
lasso_model.fit(X_train, y_train)

# 5. Save the model
joblib.dump(lasso_model, 'lasso_model.pkl')
# Save the processed dataframe
df.to_csv("car_price_prediction_edit.csv", index=False)

## Streamlit

In [2]:
%%writefile car_price_streamlit.py
import streamlit as st
import pandas as pd
import joblib


# Load the dataset
df = pd.read_csv('car_price_prediction_edit.csv')

# Load the trained machine learning model
predicted_model = joblib.load('lasso_model.pkl')



# Group by 'Manufacturer' and then get unique 'Model' values for each group
model_dict = df.groupby('Manufacturer')['Model'].unique().to_dict()
category_dict = df.groupby('Model')['Category'].unique().to_dict()
fuel_type_dict = df.groupby('Category')['Fuel_type'].unique().to_dict()
gear_type_dict = df.groupby('Category')['Gear_type'].unique().to_dict()

# Convert numpy arrays to lists for better compatibility
for dictionary in [model_dict, category_dict, fuel_type_dict, gear_type_dict]:
    for key, value in dictionary.items():
        dictionary[key] = list(value)


# Streamlit UI
def main():
    st.title("Car Details Input")

    # Sidebar with feature input
    st.sidebar.header("Input Features")

    # Manufacturer Selection
    manufacturer = st.sidebar.selectbox(" Manufacturer", df['Manufacturer'].unique())

    # Based on Manufacturer, display the Models
    model = st.sidebar.selectbox("Model", model_dict[manufacturer])

    # Based on Model, display the Categories
    category = st.sidebar.selectbox("Select Category", category_dict[model])

    # Based on Category, display the Fuel Types and Gear Types
    fuel_type = st.sidebar.selectbox("Fuel Type", fuel_type_dict[category])
    gear_type = st.sidebar.selectbox("Gear Type", gear_type_dict[category])

    produced_year = st.sidebar.slider("Produced Year", min_value=2000, max_value=2023, value=2010, step=1)

    # Displaying the user input for Streamlit view
    display_data = {
        'Manufacturer': manufacturer,
        'Model': model,
        'Produced Year': f"{produced_year}",  # Display without comma
        'Category': category,
        'Fuel Type': fuel_type,
        'Gear Type': gear_type
    }
    st.subheader("User Input Features")
    st.write(pd.DataFrame([display_data]))

    data_for_prediction = {
        'Manufacturer': [manufacturer],
        'Model': [model],
        'Produced_year': [produced_year],  # Correct column name for prediction
        'Category': [category],
        'Fuel_type': [fuel_type],
        'Gear_type': [gear_type]
    }
    predicted_price = predicted_model.predict(pd.DataFrame(data_for_prediction))

    # Display the prediction in the Streamlit app
    st.subheader('Predicted Price')
    st.success("The estimated price of your car is ${}. ".format(int(predicted_price[0])))


if __name__ == '__main__':
    main()


Writing car_price_streamlit.py


In [None]:
!streamlit run car_price_streamlit.py