In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
dataset = pd.read_csv('cardataset.csv')

# Perform EDA
sns.histplot(dataset['selling_price'], kde=True)
plt.show()

sns.scatterplot(x='year', y='selling_price', data=dataset)
plt.show()

sns.boxplot(x='fuel', y='selling_price', data=dataset)
plt.show()

sns.boxplot(x='transmission', y='selling_price', data=dataset)
plt.show()

sns.boxplot(x='seller_type', y='selling_price', data=dataset)
plt.show()

# Preprocess the dataset
X = dataset.drop(['name', 'selling_price'], axis=1)
y = dataset['selling_price']

# Label encode categorical features
le = LabelEncoder()
X['fuel'] = le.fit_transform(X['fuel'])
X['seller_type'] = le.fit_transform(X['seller_type'])
X['transmission'] = le.fit_transform(X['transmission'])

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Choose different algorithms and try to get the best performing model
models = [LinearRegression(), DecisionTreeRegressor(), RandomForestRegressor()]
for model in models:
    model.fit(y_train,X_train )
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    print(model.__class__.__name__)
    print('RMSE:', rmse)
    print('R2 Score:', r2)

# Build a predictive system
def predict_price(model, Year, Present_Price, Kms_Driven, Fuel_Type, Seller_Type, Transmission, Owner):
    Fuel_Type_Petrol = 0
    Fuel_Type_Diesel = 0
    if Fuel_Type == 'Petrol':
        Fuel_Type_Petrol = 1
    elif Fuel_Type == 'Diesel':
        Fuel_Type_Diesel = 1

    Seller_Type_Individual = 0
    if Seller_Type == 'Individual':
        Seller_Type_Individual = 1

    Transmission_Manual = 0
    if Transmission == 'Manual':
        Transmission_Manual = 1

    prediction = model.predict([[Year, Present_Price, Kms_Driven, Fuel_Type_Diesel, Fuel_Type_Petrol, Seller_Type_Individual, Transmission_Manual, Owner]])
    return prediction[0]

model = RandomForestRegressor()
model.fit(X_train, y_train)

print(predict_price(model, 2014, 5.59, 27000, 'Petrol', 'Dealer', 'Manual', 0))


# New Section