In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error

# Load dataset
df = pd.read_csv("macbook_features_dataset.csv")
df["Features"] = df[["Feature_1", "Feature_2", "Feature_3", "Feature_4", "Feature_5"]].apply(lambda x: " ".join(x), axis=1)

# Encode descriptions
label_encoder = LabelEncoder()
df["Description_Encoded"] = label_encoder.fit_transform(df["Description"])

# Vectorize text
tfidf = TfidfVectorizer()
X = tfidf.fit_transform(df["Features"])
y = df["Description_Encoded"]

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test).astype(int)
mse = mean_squared_error(y_test, y_pred)
accuracy = 100 - mse

print(f"Model Accuracy: {accuracy:.2f}%")


Model Accuracy: 100.00%
