In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv('mobile_price_dataset.csv')

# Basic data exploration
print("\n=== Dataset Information ===")
print(df.info())
print("\n=== Descriptive Statistics ===")
print(df.describe())

# Target distribution
print("\n=== Price Range Distribution ===")
print(df['price_range'].value_counts())

# Feature correlation analysis
correlation_matrix = df.corr()
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Feature Correlation Heatmap')
plt.show()

# Prepare features and target
X = df.drop('price_range', axis=1)
y = df['price_range']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model selection and hyperparameter tuning
rf = RandomForestClassifier(random_state=42)
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_scaled, y_train)

# Best model
best_rf = grid_search.best_estimator_
print("\n=== Best Model Parameters ===")
print(grid_search.best_params_)

# Model evaluation
y_pred = best_rf.predict(X_test_scaled)

print("\n=== Model Performance ===")
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Feature importance
feature_importance = pd.DataFrame({
    'feature': X.columns,
    'importance': best_rf.feature_importances_
}).sort_values('importance', ascending=False)

plt.figure(figsize=(10, 6))
sns.barplot(x='importance', y='feature', data=feature_importance)
plt.title('Feature Importance')
plt.show()

# Function to predict price range for new phones
def predict_price_range(phone_data):
    """
    Predict the price range of a mobile phone based on its features.

    Parameters:
    phone_data (dict): Dictionary containing phone features

    Returns:
    str: Price range description (low, medium, high, very high)
    """
    # Create DataFrame from input data
    input_df = pd.DataFrame([phone_data])

    # Scale the input data
    input_scaled = scaler.transform(input_df)

    # Predict price range
    prediction = best_rf.predict(input_scaled)[0]

    price_ranges = {0: 'Low', 1: 'Medium', 2: 'High', 3: 'Very High'}
    return price_ranges[prediction]

# Example usage
if __name__ == "__main__":
    # Example phone features
    example_phone = {
        'battery_power': 3000,
        'blue': 1,
        'clock_speed': 2.0,
        'dual_sim': 1,
        'fc': 5,
        'four_g': 1,
        'int_memory': 32,
        'm_dep': 0.8,
        'mobile_wt': 150,
        'n_cores': 8,
        'pc': 12,
        'px_height': 1080,
        'px_width': 1920,
        'ram': 4000,
        'sc_h': 15,
        'sc_w': 7,
        'talk_time': 10,
        'three_g': 1,
        'touch_screen': 1,
        'wifi': 1
    }

    print("\n=== Example Prediction ===")
    predicted_price = predict_price_range(example_phone)
    print(f"Predicted Price Range: {predicted_price}")