In [None]:
# 📦 Imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

# 🔄 Load the Dataset
df = pd.read_csv("dataset.csv")  # Change path if needed

# 📌 Basic Info
print(df.info())
print(df.head())

# 🎯 Target and Features
X = df.drop("price_range", axis=1)
y = df["price_range"]

# 🔧 Preprocessing: Feature Scaling
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# 🔍 1. Class Balance Visualization
plt.figure(figsize=(6, 4))
sns.countplot(x=y, palette="viridis")
plt.title("Distribution of Price Range Classes")
plt.xlabel("Price Range")
plt.ylabel("Count")
plt.show()

# 🔍 2. Correlation Heatmap
plt.figure(figsize=(14, 10))
sns.heatmap(df.corr(), cmap="coolwarm", annot=False, linewidths=0.5)
plt.title("Feature Correlation Heatmap")
plt.show()

# 🌲 3. Feature Importance via Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_scaled, y)

importances = rf.feature_importances_
feat_imp_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': importances
}).sort_values(by='Importance', ascending=False)

# 📊 Plot Feature Importance
plt.figure(figsize=(10, 6))
sns.barplot(x="Importance", y="Feature", data=feat_imp_df, palette="viridis")
plt.title("Feature Importance (Random Forest)")
plt.show()

# 📊 4. Additional Chart: RAM vs. Price Range
plt.figure(figsize=(8, 5))
sns.boxplot(x=y, y=df['ram'], palette="magma")
plt.title("RAM vs. Price Range")
plt.xlabel("Price Range")
plt.ylabel("RAM (MB)")
plt.show()


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt

# 🧪 Step 1: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

# 🌲 Step 2: Train Random Forest
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
rf_preds = rf_model.predict(X_test)

# 📈 Step 3: Train Logistic Regression
lr_model = LogisticRegression(multi_class='multinomial', max_iter=1000)
lr_model.fit(X_train, y_train)
lr_preds = lr_model.predict(X_test)

# 🧮 Step 4: Evaluate both models
rf_acc = accuracy_score(y_test, rf_preds)
lr_acc = accuracy_score(y_test, lr_preds)

print("Random Forest Accuracy:", rf_acc)
print("Logistic Regression Accuracy:", lr_acc)

print("\nRandom Forest Classification Report:")
print(classification_report(y_test, rf_preds))

print("\nLogistic Regression Classification Report:")
print(classification_report(y_test, lr_preds))

# 📊 Step 5: Plot Confusion Matrices
rf_cm = confusion_matrix(y_test, rf_preds)
lr_cm = confusion_matrix(y_test, lr_preds)

fig, axs = plt.subplots(1, 2, figsize=(14, 6))

sns.heatmap(rf_cm, annot=True, fmt="d", cmap="Blues", ax=axs[0])
axs[0].set_title("Random Forest Confusion Matrix")
axs[0].set_xlabel("Predicted")
axs[0].set_ylabel("Actual")

sns.heatmap(lr_cm, annot=True, fmt="d", cmap="Greens", ax=axs[1])
axs[1].set_title("Logistic Regression Confusion Matrix")
axs[1].set_xlabel("Predicted")
axs[1].set_ylabel("Actual")

plt.tight_layout()
plt.show()
