In [93]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression   # ‡¶ö‡¶æ‡¶á‡¶≤‡ßá RandomForest, XGB ‡¶á‡¶§‡ßç‡¶Ø‡¶æ‡¶¶‡¶ø ‡¶¶‡¶ø‡¶§‡ßá ‡¶™‡¶æ‡¶∞‡ßã
import joblib

# ==========================
# 1. Dataset Load
# ==========================
df = pd.read_csv("smartphones.csv")   # ‡¶§‡ßã‡¶Æ‡¶æ‡¶∞ dataset ‡¶è‡¶ñ‡¶æ‡¶®‡ßá

# Features & Target
X = df.drop("price", axis=1)   # price ‡¶¨‡¶æ‡¶¶ ‡¶¶‡¶ø‡ßü‡ßá ‡¶∏‡¶¨ feature
y = df["price"]                # target column
# Numeric fill
for col in X.select_dtypes(include=['number']).columns:
    X[col] = X[col].fillna(X[col].mean())

# Categorical fill
for col in X.select_dtypes(include=['object']).columns:
    X[col] = X[col].fillna(X[col].mode()[0])

# ==========================
# 2. Categorical & Numeric Columns
# ==========================
categorical_cols = ['brand_name','model','processor_brand','os']
numeric_cols = [col for col in X.columns if col not in categorical_cols]

# ==========================
# 3. ColumnTransformer ‡¶¨‡¶æ‡¶®‡¶æ‡¶®‡ßã
# ==========================
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols),
        ("num", StandardScaler(), numeric_cols)
    ]
)

# ==========================
# 4. Pipeline ‡¶¨‡¶æ‡¶®‡¶æ‡¶®‡ßã
# ==========================
pipeline = Pipeline(steps=[
    ("preprocess", preprocessor),
    ("model", LinearRegression())
])

# ==========================
# 5. Train/Test Split
# ==========================
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ==========================
# 6. Train Model
# ==========================
pipeline.fit(X_train, y_train)

# ==========================
# 7. Save Pipeline
# ==========================
joblib.dump(pipeline, "phone_pipeline.pkl")
print("‚úÖ Pipeline saved successfully!")


‚úÖ Pipeline saved successfully!


In [94]:
import joblib
import pandas as pd

# Load trained pipeline
pipeline = joblib.load("phone_pipeline.pkl")

# Example User Input
user_input = pd.DataFrame([{
    'brand_name': 'Samsung',
    'model': 'Galaxy S21',
    'avg_rating': 4.5,
    '5G_or_not': 1,
    'processor_brand': 'snapdragon',
    'num_cores': 8,
    'processor_speed': 2.9,
    'battery_capacity': 4000,
    'fast_charging_available': 1,
    'fast_charging': 25,
    'ram_capacity': 8,
    'internal_memory': 128,
    'screen_size': 6.2,
    'refresh_rate': 120,
    'num_rear_cameras': 3,
    'os': 'Android',
    'primary_camera_rear': 64,
    'primary_camera_front': 32,
    'extended_memory_available': 1,
    'resolution_height': 2400,
    'resolution_width': 1080
}])

# Directly predict (encoding + scaling automatically ‡¶π‡¶¨‡ßá)
predicted_price = pipeline.predict(user_input)
print("üí∞ Predicted Price:", predicted_price[0])


üí∞ Predicted Price: 63602.49110171822
