In [None]:
# Install necessary libraries
pip install seaborn
pip install scikit-learn
pip install ipywidgets

In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.impute import SimpleImputer
import warnings
warnings.filterwarnings('ignore')

In [None]:
# uploading file
import ipywidgets as widgets
from IPython.display import display

# Create an upload widget
upload_button = widgets.FileUpload(
    accept='.csv,.xlsx',
    multiple=True          
)

# Display the upload button
display(upload_button)

# Access the uploaded file(s)
def handle_upload(change):
    for filename, file in upload_button.value.items():
        print(f"File {filename} uploaded successfully!")

upload_button.observe(handle_upload, names='value')

In [None]:
def load_and_explore_data(filepath):
    df = pd.read_csv(filepath)
    print(f"Dataset Shape: {df.shape}")
    print("\nMissing Values:")
    print(df.isnull().sum())

    # Visualize price distribution
    plt.figure(figsize=(10, 6))
    sns.histplot(df['Price'], kde=True)
    plt.title('Distribution of Laptop Prices')
    plt.show()

    # Correlation analysis
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    plt.figure(figsize=(12, 8))
    sns.heatmap(df[numeric_cols].corr(), annot=True, cmap='coolwarm')
    plt.title('Correlation Matrix')
    plt.show()

    return df

filepath = r'C:\Users\YourUsername\Documents\laptop.csv'
df = load_and_explore_data(filepath)

In [None]:
# Data Preprocessing and Feature Engineering
def preprocess_laptop_data(df):
    # Create a copy of the dataframe
    df = df.copy()

    # Remove unnamed columns
    df = df.drop([col for col in df.columns if 'Unnamed' in col], axis=1)

    # Remove null values
    df = df.dropna()

    # Basic preprocessing
    df['Ram'] = df['Ram'].str.extract('(\d+)').astype(float)
    df['Weight'] = df['Weight'].str.extract('(\d+\.?\d*)').astype(float)

    # Create CPU features
    df['CPU_Speed'] = df['Cpu'].str.extract('(\d+\.?\d*)GHz').astype(float)
    df['CPU_Brand'] = df['Cpu'].apply(lambda x:
        'Intel' if 'Intel' in str(x) else
        'AMD' if 'AMD' in str(x) else 'Other'
    )

    # Create GPU features
    df['GPU_Brand'] = df['Gpu'].apply(lambda x:
        'NVIDIA' if any(brand in str(x).upper() for brand in ['NVIDIA', 'GTX', 'RTX']) else
        'AMD' if any(brand in str(x).upper() for brand in ['AMD', 'RADEON']) else
        'Intel' if 'INTEL' in str(x).upper() else
        'Other'
    )

    # Other features
    df['TouchScreen'] = df['ScreenResolution'].apply(lambda x: 1 if 'Touch' in str(x) else 0)
    df['SSD'] = df['Memory'].apply(lambda x: 1 if 'SSD' in str(x) else 0)
    df['HDD'] = df['Memory'].apply(lambda x: 1 if 'HDD' in str(x) else 0)

    # Process Company
    major_companies = ['Apple', 'HP', 'Acer', 'Asus', 'Dell', 'Lenovo', 'MSI', 'Microsoft', 'Toshiba']
    df['Company'] = df['Company'].apply(lambda x: x if x in major_companies else 'Other')

    # Process OS
    df['OpSys'] = df['OpSys'].apply(lambda x:
        'Windows' if 'Windows' in str(x) else
        'Mac' if 'Mac' in str(x) else
        'Linux' if 'Linux' in str(x) else
        'Chrome' if 'Chrome' in str(x) else
        'No OS' if 'No OS' in str(x) else
        'Other'
    )
    df.loc[df['Company'] == 'Apple', 'OpSys'] = 'Mac'

    # Handle missing values in numerical features
    numerical_features = ['Ram', 'Weight', 'CPU_Speed']
    imputer = SimpleImputer(strategy='median')
    df[numerical_features] = imputer.fit_transform(df[numerical_features])

    return df

# Apply preprocessing
try:
    df_processed = preprocess_laptop_data(df)
    print("Processed dataframe shape:", df_processed.shape)
except Exception as e:
    print(f"Error during preprocessing: {str(e)}")

In [None]:
df_processed.head()

In [None]:
# Prepare Training Data
def prepare_training_data(df):

    # Define features and target variable
    features = ['Company', 'Ram', 'Weight', 'CPU_Brand', 'CPU_Speed',
                'GPU_Brand', 'TouchScreen', 'OpSys', 'SSD', 'HDD']

    X = df[features]
    y = df['Price']

    # Define categorical and numerical feature lists
    categorical_features = ['Company', 'CPU_Brand', 'GPU_Brand', 'OpSys']
    numerical_features = ['Ram', 'Weight', 'CPU_Speed', 'TouchScreen', 'SSD', 'HDD']

    # Create preprocessors for numerical and categorical data
    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='constant', fill_value='Other')),
        ('onehot', OneHotEncoder(drop='first', sparse_output=False, handle_unknown='ignore'))
    ])

    numerical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='median'))
    ])

    # Combine preprocessors
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numerical_transformer, numerical_features),
            ('cat', categorical_transformer, categorical_features)
        ])

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    return X_train, X_test, y_train, y_test, preprocessor

# Prepare the training data
try:
    X_train, X_test, y_train, y_test, preprocessor = prepare_training_data(df_processed)

    # Transform the data
    X_train_scaled = preprocessor.fit_transform(X_train)
    X_test_scaled = preprocessor.transform(X_test)

    print("X_train_scaled shape:", X_train_scaled.shape)
    print("X_test_scaled shape:", X_test_scaled.shape)

except Exception as e:
    print(f"Error during data preparation: {str(e)}")

In [None]:
# Training Models
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from typing import Dict, Any, Tuple
from time import time

def train_model_with_metrics(
    model: Any,
    model_name: str,
    X_train: np.ndarray,
    X_test: np.ndarray,
    y_train: np.ndarray,
    y_test: np.ndarray
) -> Dict[str, Any]:

    try:
        # Start timer
        start_time = time()

        # Train model
        model.fit(X_train, y_train)

        # Make predictions
        y_pred = model.predict(X_test)

        # Calculate metrics
        r2 = r2_score(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100

        # Calculate training time
        training_time = time() - start_time

        # Store results
        results = {
            'model': model,
            'r2': r2,
            'mae': mae,
            'rmse': rmse,
            'mape': mape,
            'training_time': training_time,
            'predictions': y_pred
        }

        # Print results
        print(f"\n{model_name} Results:")
        print(f"R² Score: {r2:.4f}")
        print(f"MAE: ${mae:.2f}")
        print(f"RMSE: ${rmse:.2f}")
        print(f"MAPE: {mape:.2f}%")
        print(f"Training Time: {training_time:.2f} seconds")

        return results

    except Exception as e:
        print(f"Error training {model_name}: {str(e)}")
        return None

def train_linear_regression(
    X_train: np.ndarray,
    X_test: np.ndarray,
    y_train: np.ndarray,
    y_test: np.ndarray
) -> Dict[str, Any]:

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore")
        model = LinearRegression()
        return train_model_with_metrics(model, "Linear Regression",
                                      X_train, X_test, y_train, y_test)

def train_random_forest(
    X_train: np.ndarray,
    X_test: np.ndarray,
    y_train: np.ndarray,
    y_test: np.ndarray
) -> Dict[str, Any]:

    model = RandomForestRegressor(
        n_estimators=100,
        max_depth=None,
        min_samples_split=2,
        min_samples_leaf=1,
        max_features='sqrt',
        n_jobs=-1,
        random_state=42
    )
    return train_model_with_metrics(model, "Random Forest",
                                  X_train, X_test, y_train, y_test)

def train_gradient_boosting(
    X_train: np.ndarray,
    X_test: np.ndarray,
    y_train: np.ndarray,
    y_test: np.ndarray
) -> Dict[str, Any]:

    model = GradientBoostingRegressor(
        n_estimators=100,
        learning_rate=0.1,
        max_depth=3,
        min_samples_split=2,
        min_samples_leaf=1,
        subsample=1.0,
        max_features=None,
        random_state=42
    )
    return train_model_with_metrics(model, "Gradient Boosting",
                                  X_train, X_test, y_train, y_test)

def train_all_models(
    X_train_scaled: np.ndarray,
    X_test_scaled: np.ndarray,
    y_train: np.ndarray,
    y_test: np.ndarray
) -> Dict[str, Dict[str, Any]]:

    try:

        # Verify input shapes
        print("\nInput Data Shapes:")
        print(f"X_train: {X_train_scaled.shape}")
        print(f"X_test: {X_test_scaled.shape}")
        print(f"y_train: {y_train.shape}")
        print(f"y_test: {y_test.shape}")

        model_results = {}

        # Train each model
        models = {
            'Linear Regression': train_linear_regression,
            'Random Forest': train_random_forest,
            'Gradient Boosting': train_gradient_boosting
        }

        for name, train_func in models.items():
            results = train_func(X_train_scaled, X_test_scaled, y_train, y_test)
            if results is not None:
                model_results[name] = results

        # Find best model
        best_model = max(model_results.items(), key=lambda x: x[1]['r2'])
        print("\nBest Performing Model:")
        print(f"Model: {best_model[0]}")
        print(f"R² Score: {best_model[1]['r2']:.4f}")
        print(f"MAE: ${best_model[1]['mae']:.2f}")

        return model_results

    except Exception as e:
        print(f"Error in train_all_models: {str(e)}")
        raise

# Usage example
try:
    # Train all models
    model_results = train_all_models(X_train_scaled, X_test_scaled, y_train, y_test)

except Exception as e:
    print(f"Error in model training process: {str(e)}")

In [None]:
# Tuning Hyperparameters
def tune_linear_regression(X_train_scaled, y_train):
    lr = LinearRegression()
    lr.fit(X_train_scaled, y_train)
    print("\nLinear Regression (No hyperparameters to tune)")
    return lr

def tune_random_forest(X_train_scaled, y_train):
    print("\nTuning Random Forest Parameters:")

    # Reduced parameter grid
    param_grid = {
        'n_estimators': [100],
        'max_depth': [10, 15],
        'min_samples_split': [5],
        'min_samples_leaf': [2]
    }

    rf = RandomForestRegressor(random_state=42)
    grid_search = GridSearchCV(
        rf,
        param_grid,
        cv=3,
        scoring='r2',
        n_jobs=-1,
        verbose=1
    )

    grid_search.fit(X_train_scaled, y_train)

    print("Best Random Forest Parameters:")
    print(grid_search.best_params_)
    print(f"Best R2 score: {grid_search.best_score_:.4f}")

    return grid_search.best_estimator_

def tune_gradient_boosting(X_train_scaled, y_train):
    print("\nTuning Gradient Boosting Parameters:")

    # Reduced parameter grid
    param_grid = {
        'n_estimators': [100],
        'learning_rate': [0.1],
        'max_depth': [3, 5],
        'min_samples_split': [5],
        'subsample': [0.8]
    }

    gb = GradientBoostingRegressor(random_state=42)
    grid_search = GridSearchCV(
        gb,
        param_grid,
        cv=3,
        scoring='r2',
        n_jobs=-1,
        verbose=1
    )

    grid_search.fit(X_train_scaled, y_train)

    print("Best Gradient Boosting Parameters:")
    print(grid_search.best_params_)
    print(f"Best R2 score: {grid_search.best_score_:.4f}")

    return grid_search.best_estimator_

def tune_all_models(X_train_scaled, y_train):

    best_models = {}

    try:
        # Tune Linear Regression
        best_models['Linear Regression'] = tune_linear_regression(X_train_scaled, y_train)

        # Tune Random Forest
        best_models['Random Forest'] = tune_random_forest(X_train_scaled, y_train)

        # Tune Gradient Boosting
        best_models['Gradient Boosting'] = tune_gradient_boosting(X_train_scaled, y_train)

    except KeyboardInterrupt:
        print("\nTuning interrupted by user. Returning best models found so far")

    return best_models

def evaluate_tuned_models(best_models, X_train_scaled, X_test_scaled, y_train, y_test):

    print("\nEvaluating Tuned Models:")

    results = {}
    for name, model in best_models.items():
        # Make predictions
        y_pred = model.predict(X_test_scaled)

        # Calculate metrics
        r2 = r2_score(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)

        results[name] = {
            'model': model,
            'r2': r2,
            'mae': mae
        }

        print(f"\n{name} Results:")
        print(f"R2 Score: {r2:.4f}")
        print(f"MAE: ${mae:.2f}")

    return results


# Tune and evaluate models
try:
    best_models = tune_all_models(X_train_scaled, y_train)
    tuned_results = evaluate_tuned_models(best_models, X_train_scaled, X_test_scaled, y_train, y_test)

    # Find the best performing model
    best_model_name = max(tuned_results.items(), key=lambda x: x[1]['r2'])[0]
    print(f"\nBest performing model: {best_model_name}")
    print(f"R2 Score: {tuned_results[best_model_name]['r2']:.4f}")
    print(f"MAE: ${tuned_results[best_model_name]['mae']:.2f}")

except KeyboardInterrupt:
    print("\nProcess interrupted by user. Showing partial results if available.")

In [None]:
# Analyzing Feature Importance
def analyze_feature_importance(models, preprocessor, original_features):

    # Get transformed feature names
    def get_feature_names(preprocessor, original_features):
        feature_names = []

        for name, transformer, features in preprocessor.transformers_:
            if name == 'num':
                feature_names.extend(features)
            elif name == 'cat':
                encoded_features = transformer.named_steps['onehot'].get_feature_names_out(features)
                feature_names.extend(encoded_features)

        return feature_names

    # Get transformed feature names
    feature_names = get_feature_names(preprocessor, original_features)
    importance_dict = {}

    try:
        for name, model in models.items():
            if name == 'Linear Regression':
                importance = np.abs(model.coef_)
            elif name in ['Random Forest', 'Gradient Boosting']:
                importance = model.feature_importances_

            # Create DataFrame with feature importance
            importance_df = pd.DataFrame({
                'Feature': feature_names,
                'Importance': importance
            })

            # Sort by importance and reset index
            importance_df = importance_df.sort_values('Importance', ascending=False).reset_index(drop=True)
            importance_dict[name] = importance_df

            # Print feature importance values
            print(f"\n{name} Feature Importance Values:")
            print(importance_df.to_string(index=True))

            # Visualize feature importance
            plt.figure(figsize=(10, 6))
            sns.barplot(data=importance_df.head(10), x='Importance', y='Feature')
            plt.title(f'{name} - Feature Importance')
            plt.xlabel('Importance')
            plt.ylabel('Features')
            plt.tight_layout()
            plt.show()

    except Exception as e:
        print(f"Error in feature importance analysis: {str(e)}")

    return importance_dict

# Usage
try:
    # Store original feature names
    original_features = ['Company', 'Ram', 'Weight', 'CPU_Brand', 'CPU_Speed',
                        'GPU_Brand', 'TouchScreen', 'OpSys', 'SSD', 'HDD']

    # Analyze feature importance
    importance_dict = analyze_feature_importance(best_models, preprocessor, original_features)

    # Compare feature importance across models
    plt.figure(figsize=(15, 6))

    # Plot feature importance comparison for top 5 features
    model_names = list(importance_dict.keys())
    for i, model_name in enumerate(model_names):
        top_features = importance_dict[model_name].head(5)

        plt.subplot(1, len(model_names), i+1)
        sns.barplot(data=top_features, x='Importance', y='Feature')
        plt.title(f'{model_name}\nTop 5 Features')
        plt.xlabel('Importance')
        plt.ylabel('Features' if i == 0 else '')

    plt.tight_layout()
    plt.show()

except Exception as e:
    print(f"Error in visualization: {str(e)}")

In [None]:
# Real Time Prediction
import numpy as np
import pandas as pd

def create_laptop_input():
    print("\nEnter Laptop Specifications:")

    # Company options
    company_options = {
        0: 'Acer',
        1: 'Apple',
        2: 'Asus',
        3: 'Dell',
        4: 'HP',
        5: 'Lenovo',
        6: 'MSI',
        7: 'Microsoft',
        8: 'Toshiba',
        9: 'Other'
    }
    print("\nCompany Options:")
    for key, value in company_options.items():
        print(f"{key}: {value}")
    company = int(input("Enter company number: "))
    company = company_options[company]

    # RAM options
    print("\nRAM Options (GB):")
    ram_options = [4, 8, 16, 32, 64]
    for i, ram in enumerate(ram_options):
        print(f"{i}: {ram}GB")
    ram_choice = int(input("Enter RAM choice: "))
    ram = ram_options[ram_choice]

    # Weight
    weight = float(input("\nEnter Weight (kg): "))

    # CPU Brand options
    cpu_options = {
        0: 'Intel',
        1: 'AMD',
        2: 'Other'
    }
    print("\nCPU Brand Options:")
    for key, value in cpu_options.items():
        print(f"{key}: {value}")
    cpu_brand = int(input("Enter CPU brand number: "))
    cpu_brand = cpu_options[cpu_brand]

    # CPU Speed
    print("\nTypical CPU Speed ranges:")
    print("1.8 GHz - 2.2 GHz (Low Power)")
    print("2.3 GHz - 2.8 GHz (Mid Range)")
    print("2.9 GHz - 3.6 GHz (High Performance)")
    cpu_speed = float(input("\nEnter CPU Speed (GHz): "))

    # GPU Brand options
    gpu_options = {
        0: 'NVIDIA',
        1: 'AMD',
        2: 'Intel',
        3: 'Other'
    }
    print("\nGPU Brand Options:")
    for key, value in gpu_options.items():
        print(f"{key}: {value}")
    gpu_brand = int(input("Enter GPU brand number: "))
    gpu_brand = gpu_options[gpu_brand]

    # TouchScreen
    touchscreen = int(input("\nTouchscreen (0: No, 1: Yes): "))

    # Operating System options
    os_options = {
        0: 'Windows',
        1: 'Mac',
        2: 'Linux',
        3: 'Chrome',
        4: 'No OS',
        5: 'Other'
    }
    print("\nOperating System Options:")
    for key, value in os_options.items():
        print(f"{key}: {value}")
    os = int(input("Enter OS number: "))
    os = os_options[os]

    # Storage options
    print("\nStorage Options:")
    ssd = int(input("SSD (0: No, 1: Yes): "))
    hdd = int(input("HDD (0: No, 1: Yes): "))

    # Create DataFrame
    new_laptop = pd.DataFrame([[
        company,
        ram,
        weight,
        cpu_brand,
        cpu_speed,
        gpu_brand,
        touchscreen,
        os,
        ssd,
        hdd
    ]], columns=['Company', 'Ram', 'Weight', 'CPU_Brand', 'CPU_Speed',
                'GPU_Brand', 'TouchScreen', 'OpSys', 'SSD', 'HDD'])

    # Display summary of selections
    print("\nSelected Specifications:")
    print(f"Company: {company}")
    print(f"RAM: {ram}GB")
    print(f"Weight: {weight}kg")
    print(f"CPU Brand: {cpu_brand}")
    print(f"CPU Speed: {cpu_speed}GHz")
    print(f"GPU Brand: {gpu_brand}")
    print(f"TouchScreen: {'Yes' if touchscreen else 'No'}")
    print(f"Operating System: {os}")
    print(f"SSD: {'Yes' if ssd else 'No'}")
    print(f"HDD: {'Yes' if hdd else 'No'}")

    confirm = input("\nConfirm specifications? (yes/no): ")
    if confirm.lower() != 'yes':
        return create_laptop_input()

    return new_laptop

def predict_price(models, preprocessor, new_laptop):
    predictions = {}

    try:
        # Transform the input using preprocessor
        new_laptop_transformed = preprocessor.transform(new_laptop)

        for name, model in models.items():
            pred = model.predict(new_laptop_transformed)[0]
            predictions[name] = pred

        return predictions
    except Exception as e:
        print(f"Error in prediction: {str(e)}")
        return None

def display_prediction_results(predictions):
    if predictions:
        print("\nPredicted Prices:")

        prices = list(predictions.values())
        avg_price = np.mean(prices)
        min_price = np.min(prices)
        max_price = np.max(prices)

        for model_name, price in predictions.items():
            print(f"{model_name}: ${price:,.2f}")

        print("\nSummary:")
        print(f"Average Price: ${avg_price:,.2f}")
        print(f"Price Range: ${min_price:,.2f} - ${max_price:,.2f}")
    else:
        print("Unable to make predictions. Please check the input values.")

# Main execution
try:
    # Get laptop specifications from user
    new_laptop = create_laptop_input()

    # Make predictions using all models
    predictions = predict_price(best_models, preprocessor, new_laptop)

    # Display results
    display_prediction_results(predictions)

except Exception as e:
    print(f"An error occurred: {str(e)}")
    print("Please try again.")