# **1. Import required libraries**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.preprocessing import PolynomialFeatures
import numpy as np

# **2. Data Processing**

In [None]:
df = pd.read_csv('.../Car-Evaluation/Dataset/Final/DataVersion3.csv')
df = df.drop(['ad_id', 'car_name'], axis=1)

# Save'price_in_billion' as target
price_column = df['price_in_billion']
df = df.drop(columns=['price_in_billion'])

# Prepare for encoding
columns_for_encoding = ['origin', 'car_model', 'exterior_color', 'interior_color', 'engine', 'transmission', 'drive_type']

# Label Encoding
label_encoders = {}
for column in columns_for_encoding:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le
df['price_in_billion'] = price_column
df = df.dropna()

# Devide into features and target
features = df.drop(columns=['price_in_billion'])
target = df['price_in_billion']

# **3. Build Multi Regression Model**

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
poly_features = PolynomialFeatures(degree=2, include_bias=True)
X_train_poly = poly_features.fit_transform(X_train)
X_test_poly = poly_features.transform(X_test)

# Train Regression model on Polynomial features
model = LinearRegression()
model.fit(X_train_poly, y_train)
#ridge_model = Ridge(alpha=1.0)
#ridge_model.fit(X_train_poly, y_train)

In [None]:
print(X_train.shape)
print(X_train_poly.shape)

# **4. Evaluate Model**

In [None]:
y_pred = model.predict(X_test_poly)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("Root Mean Squared Error (RMSE):", rmse)
print("R2 Score:", r2)