# Laptop Type Prediction

In [208]:
import numpy as np 
import pandas as pd

In [209]:
import warnings
warnings.filterwarnings('ignore')

In [210]:
dataset = pd.read_csv('laptop_price_processed.csv')

In [211]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 23 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   laptop_ID         1303 non-null   int64  
 1   Company           1303 non-null   object 
 2   TypeName          1303 non-null   object 
 3   Inches            1303 non-null   float64
 4   Ram               1303 non-null   int64  
 5   OpSys             1303 non-null   object 
 6   Weight            1303 non-null   float64
 7   Price_euros       1303 non-null   float64
 8   IPS_Panel         1303 non-null   int64  
 9   Retina_Display    1303 non-null   int64  
 10  Touchscreen       1303 non-null   int64  
 11  Resolution_X      1303 non-null   int64  
 12  Resolution_Y      1303 non-null   int64  
 13  Total_Pixels      1303 non-null   int64  
 14  High_Resolution   1303 non-null   int64  
 15  Product_Series    1303 non-null   object 
 16  Cpu_Brand         1303 non-null   object 


In [212]:
features = ['Ram', 'Weight', 'Inches', 'Company', 'Cpu_Brand', 'Cpu_Series', 'Gpu_Series', 'OpSys', 'Price_euros']
target = 'TypeName'

### encoding categorical values

In [213]:
from sklearn.preprocessing import LabelEncoder

categorical_col = ['Company', 'Cpu_Brand', 'Cpu_Series', 'Gpu_Series', 'OpSys']

encoders = {}
for col in categorical_col:
    encoders[col] = LabelEncoder()
    dataset[col] = encoders[col].fit_transform(dataset[col])

# Encode target variable
target_encoder = LabelEncoder()
dataset[target] = target_encoder.fit_transform(dataset[target])

In [214]:
X = dataset[features]
y = dataset[target]

### spliting data to training and testing

In [215]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

### SVC

In [216]:
from sklearn.svm import SVC
svc_model = SVC(kernel='rbf', random_state=21)
svc_model.fit(X_train, y_train)

In [217]:
y_pred = svc_model.predict(X_test)

In [218]:
from sklearn.metrics import accuracy_score
score = accuracy_score(y_test, y_pred)
print(f'Accuracy Score {score}')

Accuracy Score 0.6377551020408163


### Gaussian Naive Bayes

In [219]:
from sklearn.naive_bayes import GaussianNB
gb = GaussianNB()
gb.fit(X_train, y_train)

In [220]:
y_pred2 = gb.predict(X_test)

In [221]:
score2 = accuracy_score(y_test, y_pred2)
print(f'Accuracy Score {score2}')

Accuracy Score 0.6836734693877551


### LightGBM

In [222]:
import lightgbm as lgb
lgbm = lgb.LGBMClassifier(n_estimators=100, learning_rate=0.1, max_depth=6, objective='multiclass', num_class=3, random_state=21)
lgbm.fit(X_train, y_train)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000110 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 447
[LightGBM] [Info] Number of data points in the train set: 1107, number of used features: 9
[LightGBM] [Info] Start training from score -2.424441
[LightGBM] [Info] Start training from score -1.850354
[LightGBM] [Info] Start training from score -3.831355
[LightGBM] [Info] Start training from score -0.587787
[LightGBM] [Info] Start training from score -1.873610
[LightGBM] [Info] Start training from score -3.751312


In [223]:
y_pred3 = lgbm.predict(X_test)

In [224]:
score3 = accuracy_score(y_test, y_pred3)
print(f'Accuracy Score {score3}')

Accuracy Score 0.8367346938775511


In [225]:
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

In [226]:
params = {
  'objective': 'regression',       # Task type: regression
  'metric': 'rmse',                # Metric: RMSE
  'num_leaves': 31,                # Maximum number of leaves in a tree
  'learning_rate': 0.05,           # Learning rate
  'feature_fraction': 0.9          # Fraction of features used at each iteration
}

In [227]:
num_round = 100
model = lgb.train(params, train_data, num_round, valid_sets=[test_data])

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000155 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 447
[LightGBM] [Info] Number of data points in the train set: 1107, number of used features: 9
[LightGBM] [Info] Start training from score 2.598916


In [228]:
y_pred4 = model.predict(X_test, num_iteration=model.best_iteration)

In [229]:
from sklearn.metrics import r2_score
score4 = r2_score(y_test, y_pred4)
print(f'r2 Score {score4}')

r2 Score 0.5363883373112674


### Logistic Regression

In [230]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(random_state=21)
lr.fit(X_train, y_train)

In [231]:
y_pred5 = lr.predict(X_test)

In [232]:
score5 = accuracy_score(y_test, y_pred5)
print(f'Accuracy Score {score5}')

Accuracy Score 0.6938775510204082


### AdaBoost Classifier

In [233]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
abc = AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=2), n_estimators=50, random_state=21)
abc.fit(X_train, y_train)

In [234]:
y_pred5 = abc.predict(X_test)

In [235]:
score5 = accuracy_score(y_test, y_pred5)
print(f'Accuracy Score {score5}')

Accuracy Score 0.576530612244898


### funtion for the prediction for Type of laptop

In [236]:
def encode_with_fallback(encoder, value):
    """Encodes a categorical value. If unseen, assigns -1."""
    if value in encoder.classes_:
        return encoder.transform([value])[0]
    else:
        return -1


def predict_typename(model, user_input):
  input_df = pd.DataFrame([user_input])
  categorical_cols = ['Company', 'Cpu_Brand', 'Cpu_Series', 'Gpu_Series', 'OpSys']
  for col in categorical_cols:
    input_df[col] = input_df[col].apply(lambda x: encode_with_fallback(encoders[col], x))
    
  typename_prediction = model.predict(input_df)
  decoded_prediction = target_encoder.inverse_transform(typename_prediction)[0]
  return f"Predicted TypeName: {decoded_prediction}"

In [237]:
user_input = {
    "Ram": 16,
    "Weight": 1.8,
    "Inches": 15.6,
    "Company": "Asus",
    "Cpu_Brand": "Intel",
    "Cpu_Series": "Core i7",
    "Gpu_Series": "GeForce",
    "OpSys": "Windows 10",
    "Price_euros": 1983
}

In [238]:
print(predict_typename(lgbm, user_input))

Predicted TypeName: Ultrabook
