# **1. Import requirement libraries**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.ensemble import StackingRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder

# **2. Data Preprocessing**

In [None]:
df = pd.read_csv('/Car-Evaluation/Dataset/Final/Remove-null-car_name-and-fill-null.csv')
df = df.drop('ad_id', axis=1)

# Save the 'price_in_billion' column separately
price_column = df['price_in_billion']
df = df.drop(columns=['price_in_billion'])

# Select columns for one-hot encoding
columns_for_encoding = ['origin', 'car_model', 'exterior_color', 'interior_color', 'engine', 'transmission', 'drive_type', 'car_name']

# Initialize and fit OneHotEncoder with the DataFrame
encoder = OneHotEncoder(sparse=False)
encoder.fit(df[columns_for_encoding])

# Apply one-hot encoding to the selected columns and convert to a new DataFrame
df_encoded = pd.DataFrame(encoder.transform(df[columns_for_encoding]), columns=encoder.get_feature_names_out(columns_for_encoding))

# Add the 'price_in_billion' column to the new DataFrame
df_encoded['price_in_billion'] = price_column

# Print the DataFrame after applying one-hot encoding
df = df_encoded.dropna()

# Separate features and target
features = df.drop(columns=['price_in_billion'])
target = df['price_in_billion']

# **3. Split train and test**

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# **4. Train Stacking Model**

In [None]:
base_models = [
    ('ridge', Ridge(alpha=1.0)),  # Mô hình cơ sở 1
    ('svr', SVR(kernel='linear', C=1.0)),  # Mô hình cơ sở 2
    ('gbr', GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42))  # Mô hình cơ sở 3
]
stacking_model = StackingRegressor(
    estimators=base_models,  # Các mô hình cơ sở
    final_estimator=GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42),  # Mô hình cuối (meta-learner)
    cv=5
)

# **5. Model Evaluation**

In [None]:
stacking_model.fit(X_train, y_train)
y_pred = stacking_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse:.2f}')

# Print a few predictions and actual values to see the results
for i in range(5):
    print(f'Predicted: {y_pred[i]:.2f}, Actual: {y_test.iloc[i]:.2f}')