In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
iabhishekofficial_mobile_price_classification_path = kagglehub.dataset_download('iabhishekofficial/mobile-price-classification')

print('Data source import complete.')


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/mobile-price-classification/train.csv
/kaggle/input/mobile-price-classification/test.csv


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
import lightgbm as lgb
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

class MobilePriceClassifier:
    def __init__(self, train_path, test_path):
        self.train_path = train_path
        self.test_path = test_path
        self.train_data = None
        self.test_data = None
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.test_ids = None
        self.scaler = RobustScaler()
        self.models = {}
        self.predictions = {}
        self.n_classes = None

    def load_and_prepare_data(self):

        try:
            # Load data
            self.train_data = pd.read_csv(self.train_path)
            self.test_data = pd.read_csv(self.test_path)

            print("Training data columns:", self.train_data.columns.tolist())
            print("Test data columns:", self.test_data.columns.tolist())

            # Store test IDs if present
            if 'id' in self.test_data.columns:
                self.test_ids = self.test_data['id']
                self.test_data = self.test_data.drop(['id'], axis=1)

            # Get number of classes
            self.n_classes = len(self.train_data['price_range'].unique())

            # Create features
            self._create_features(self.train_data)
            self._create_features(self.test_data)

            # Split features and target
            self.y_train = self.train_data['price_range']
            self.X_train = self.train_data.drop(['price_range'], axis=1)
            self.X_test = self.test_data.copy()

            # Align features
            for col in self.X_train.columns:
                if col not in self.X_test.columns:
                    self.X_test[col] = 0
            self.X_test = self.X_test[self.X_train.columns]

            # Scale features
            self.X_train_scaled = self.scaler.fit_transform(self.X_train)
            self.X_test_scaled = self.scaler.transform(self.X_test)

            print("\nData preparation completed successfully:")
            print(f"Training features shape: {self.X_train.shape}")
            print(f"Test features shape: {self.X_test.shape}")
            print(f"Training target shape: {self.y_train.shape}")
            print(f"Number of classes: {self.n_classes}")

        except Exception as e:
            print(f"Error in data preparation: {str(e)}")
            raise

    def _create_features(self, data):

        try:
            # Screen features
            if all(col in data.columns for col in ['px_width', 'px_height']):
                data['pixel_density'] = np.sqrt(data['px_width']**2 + data['px_height']**2)

            if all(col in data.columns for col in ['sc_w', 'sc_h']):
                data['screen_area'] = data['sc_w'] * data['sc_h']

            # Memory and performance features
            if all(col in data.columns for col in ['ram', 'clock_speed']):
                data['performance_score'] = data['ram'] * data['clock_speed']

            if all(col in data.columns for col in ['battery_power', 'mobile_wt']):
                data['power_weight_ratio'] = data['battery_power'] / data['mobile_wt']

            # Log transformations for skewed features
            for col in ['ram', 'battery_power', 'int_memory']:
                if col in data.columns:
                    data[f'{col}_log'] = np.log1p(data[col])

        except Exception as e:
            print(f"Error in feature creation: {str(e)}")
            raise

    def train_models(self):

        try:
            self.models = {
                'Random Forest': RandomForestClassifier(
                    n_estimators=200,
                    max_depth=10,
                    min_samples_split=5,
                    min_samples_leaf=2,
                    random_state=42
                ),
                'XGBoost': xgb.XGBClassifier(
                    n_estimators=200,
                    max_depth=5,
                    learning_rate=0.1,
                    min_child_weight=1,
                    gamma=0,
                    subsample=0.8,
                    colsample_bytree=0.8,
                    random_state=42
                ),
                'LightGBM': lgb.LGBMClassifier(
                    n_estimators=200,
                    num_leaves=31,
                    max_depth=5,
                    learning_rate=0.1,
                    min_child_samples=20,
                    min_child_weight=0.001,
                    subsample=0.8,
                    colsample_bytree=0.8,
                    random_state=42,
                    verbose=-1
                )
            }

            for name, model in self.models.items():
                print(f"\nTraining {name}...")
                model.fit(self.X_train_scaled, self.y_train)
                train_pred = model.predict(self.X_train_scaled)
                train_accuracy = accuracy_score(self.y_train, train_pred)
                print(f"{name} Training Accuracy: {train_accuracy:.4f}")

        except Exception as e:
            print(f"Error in model training: {str(e)}")
            raise

    def predict(self):

        try:
            # Initialize results DataFrame
            if self.test_ids is not None:
                results = pd.DataFrame({'id': self.test_ids})
            else:
                results = pd.DataFrame()

            # Initialize ensemble prediction array with correct shape
            ensemble_pred = np.zeros((len(self.X_test), self.n_classes))

            # Generate predictions
            for name, model in self.models.items():
                print(f"\nGenerating predictions for {name}...")
                predictions = model.predict(self.X_test_scaled)
                results[name] = predictions

                # Add to ensemble prediction
                if hasattr(model, 'predict_proba'):
                    proba = model.predict_proba(self.X_test_scaled)
                    if name == 'Random Forest':
                        ensemble_pred += 0.4 * proba
                    else:
                        ensemble_pred += 0.3 * proba

            # Get final ensemble predictions
            results['Ensemble'] = np.argmax(ensemble_pred, axis=1)

            # Save predictions
            results.to_csv('predictions.csv', index=False)
            print("\nPredictions saved to 'predictions.csv'")

            return results

        except Exception as e:
            print(f"Error in prediction: {str(e)}")
            raise

def main():

    try:
        # Initialize classifier
        classifier = MobilePriceClassifier(
            train_path='/kaggle/input/mobile-price-classification/train.csv',
            test_path='/kaggle/input/mobile-price-classification/test.csv'
        )

        # Execute pipeline
        print("Loading and preparing data...")
        classifier.load_and_prepare_data()

        print("\nTraining models...")
        classifier.train_models()

        print("\nMaking predictions...")
        predictions = classifier.predict()

    except Exception as e:
        print(f"Error in main execution: {str(e)}")
        raise

if __name__ == "__main__":
    main()

Loading and preparing data...
Training data columns: ['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g', 'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height', 'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g', 'touch_screen', 'wifi', 'price_range']
Test data columns: ['id', 'battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g', 'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height', 'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g', 'touch_screen', 'wifi']

Data preparation completed successfully:
Training features shape: (2000, 27)
Test features shape: (1000, 27)
Training target shape: (2000,)
Number of classes: 4

Training models...

Training Random Forest...
Random Forest Training Accuracy: 0.9970

Training XGBoost...
XGBoost Training Accuracy: 1.0000

Training LightGBM...
LightGBM Training Accuracy: 1.0000

Making predictions...

Generating predictions for Random Forest...

Generating predictions for XGBoost...

Ge