# Operating System prediction

In [1]:
import numpy as np 
import pandas as pd 

In [2]:
dataset = pd.read_csv('laptop_price_processed.csv')

In [3]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 23 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   laptop_ID         1303 non-null   int64  
 1   Company           1303 non-null   object 
 2   TypeName          1303 non-null   object 
 3   Inches            1303 non-null   float64
 4   Ram               1303 non-null   int64  
 5   OpSys             1303 non-null   object 
 6   Weight            1303 non-null   float64
 7   Price_euros       1303 non-null   float64
 8   IPS_Panel         1303 non-null   int64  
 9   Retina_Display    1303 non-null   int64  
 10  Touchscreen       1303 non-null   int64  
 11  Resolution_X      1303 non-null   int64  
 12  Resolution_Y      1303 non-null   int64  
 13  Total_Pixels      1303 non-null   int64  
 14  High_Resolution   1303 non-null   int64  
 15  Product_Series    1303 non-null   object 
 16  Cpu_Brand         1303 non-null   object 


### data preprocessing

In [12]:
features = ['Ram', 'Memory_Size(GB)', 'Price_euros', 'Company', 'Cpu_Brand', 'Gpu_Manufacturer', 'Memory_Type']
target = 'OpSys'

In [13]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, LabelEncoder

numeric_col = ['Ram', 'Memory_Size(GB)', 'Price_euros']
categorical_col = ['Company', 'Cpu_Brand', 'Gpu_Manufacturer', 'Memory_Type', 'OpSys']

preprocessor = ColumnTransformer([
  ('num', StandardScaler(), numeric_col),
  ('cat', LabelEncoder(), categorical_col)
])

In [14]:
X = dataset[features]
y = dataset[target]

### spliting to training and testing

In [15]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21)

### AdaBoost Classifier

In [38]:
from sklearn.pipeline import Pipeline
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
boost_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("model", AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=7), n_estimators=100, random_state=21))
])
boost_pipeline.fit(X_train, y_train)



In [39]:
y_pred2 = boost_pipeline.predict(X_test)

In [40]:
from sklearn.metrics import accuracy_score
score2 = accuracy_score(y_test, y_pred2)
print(f'accuracy score {score2}')

accuracy score 0.8697318007662835


### function for the prediction of OS

In [48]:
def OpSys_Prediction(model, user_input):
  input_df = pd.DataFrame([user_input])
  predict_os = model.predict(input_df)[0]
  return f'Operating system predicted is {predict_os}'

In [51]:
user_input = {
    "Company": "Dell",
    "Price_euros": 1290,
    "Ram": 8,
    "Cpu_Brand": "Intel",
    "Gpu_Manufacturer": "Intel",
    "Memory_Type": "SSD",
    "Memory_Size(GB)": 512
}

In [52]:
print(OpSys_Prediction(boost_pipeline, user_input))

Operating system predicted is Windows 10
