Test Script For classification Model

---



In [1]:
import pickle
import numpy as np
import pandas as pd
import missingno as msno
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

In [2]:
Xnew = pd.read_excel('/content/test.xlsx')

# Load Pickle Files

In [4]:
# Load pickled ordinal encoder
ordinal_encoder = pickle.load(open('ordinalEncoding.pkl', 'rb'))

# Load pickled label encoder
label_encoder = pickle.load(open('labelEncoding.pkl', 'rb'))

# Load pickled const values
preproc_values = pickle.load(open('preprocValues.pkl', 'rb'))

# Load pickled normalization functions
normalization_functions = pickle.load(open('normalizationFns.pkl', 'rb'))

# Load pickled distinct values
distinct_values = pickle.load(open('distinctValues.pkl', 'rb'))


In [5]:
for column, distinct_values in distinct_values.items():
    Xnew[column] = Xnew[column].apply(lambda x: x if x in distinct_values else np.nan)

# **1. Preprocessing**

In [6]:
rating_list = Xnew['rating']

In [7]:
# Handling new values
Xnew = Xnew[['brand',	'processor_brand',	'processor_name',	'processor_gnrtn',	'ram_gb',	'ram_type',	'ssd',	'hdd',	'os',	'graphic_card_gb',	'weight',	'warranty',	'Touchscreen',	'msoffice',	'Price',	'Number of Ratings',	'Number of Reviews']]

In [8]:
Xnew[['os-bits', 'os']] = Xnew['os'].str.split('-bit', n=1, expand=True)

Xnew['os'] = Xnew['os'].str.strip()

Xnew['os-bits'] = pd.to_numeric(Xnew['os-bits'])

Xnew.head(6)

Unnamed: 0,brand,processor_brand,processor_name,processor_gnrtn,ram_gb,ram_type,ssd,hdd,os,graphic_card_gb,weight,warranty,Touchscreen,msoffice,Price,Number of Ratings,Number of Reviews,os-bits
0,HP,Intel,Core i5,11th,16 GB,DDR4,512 GB,0 GB,Windows,2 GB,Casual,1 year,No,No,78000,512,85,64
1,MSI,Intel,Core i5,10th,8 GB,DDR4,512 GB,0 GB,Windows,4 GB,Casual,2 years,No,No,59990,1597,228,64
2,Lenovo,Intel,Core i5,10th,8 GB,DDR4,256 GB,1024 GB,Windows,4 GB,Gaming,No warranty,No,No,74999,268,25,64
3,HP,Intel,Core i5,11th,16 GB,DDR4,512 GB,0 GB,Windows,0 GB,ThinNlight,1 year,No,Yes,66999,17,1,64
4,ASUS,AMD,Ryzen 7,Not Available,4 GB,DDR5,1024 GB,0 GB,Windows,4 GB,Casual,1 year,No,No,114990,0,0,32
5,HP,AMD,Ryzen 5,Not Available,8 GB,DDR4,512 GB,0 GB,Windows,4 GB,Casual,1 year,No,No,58990,1049,134,64


In [9]:
#converting categorical values into numerical values.
#There are some columns that have categorical values ,but actually a numerical value

Xnew['ram_gb'] = Xnew['ram_gb'].astype(str).str.replace('GB','')
Xnew['ram_gb'] = pd.to_numeric(Xnew['ram_gb'])

Xnew['ssd'] = Xnew['ssd'].astype(str).str.replace('GB','')
Xnew['ssd'] = pd.to_numeric(Xnew['ssd'])

Xnew['hdd'] = Xnew['hdd'].astype(str).str.replace('GB','')
Xnew['hdd'] = pd.to_numeric(Xnew['hdd'])


Xnew['graphic_card_gb'] = Xnew['graphic_card_gb'].astype(str).str.replace('GB','')
Xnew['graphic_card_gb'] = pd.to_numeric(Xnew['graphic_card_gb'])

## A. Missing Values Handling

In [10]:
categorical_Xnew = Xnew.select_dtypes(include=['object', 'category'])
numerical_Xnew = Xnew.select_dtypes(include=['float64', 'int64'])

In [11]:
numeric_mean_values = preproc_values['column_means']

# Replace missing values with mean for numeric columns
for column in numerical_Xnew:
    Xnew[column].fillna(numeric_mean_values[column], inplace=True)

In [12]:
categorical_Xnew = categorical_Xnew.iloc[:, :-1]

In [13]:
categoric_mode_values = preproc_values['column_modes']

# Replace missing values with mode for categorical columns
for column in categorical_Xnew:
    Xnew[column].fillna(categoric_mode_values[column], inplace=True)

## **B. Ordinal Encoder**

In [14]:
OE_processor_gnrtn = ordinal_encoder['OE_processor_gnrtn']
OE_warranty = ordinal_encoder['OE_warranty']
OE_processor_name = ordinal_encoder['OE_processor_name']

In [15]:
Xnew['processor_gnrtn']=OE_processor_gnrtn.transform(Xnew[['processor_gnrtn']])
Xnew['warranty']=OE_warranty.transform(Xnew[['warranty']])
Xnew['processor_name']=OE_processor_name.transform(Xnew[['processor_name']])

Xnew.head()

Unnamed: 0,brand,processor_brand,processor_name,processor_gnrtn,ram_gb,ram_type,ssd,hdd,os,graphic_card_gb,weight,warranty,Touchscreen,msoffice,Price,Number of Ratings,Number of Reviews,os-bits
0,HP,Intel,6.0,6.0,16,DDR4,512,0,Windows,2,Casual,1.0,No,No,78000,512,85,64
1,MSI,Intel,6.0,5.0,8,DDR4,512,0,Windows,4,Casual,3.0,No,No,59990,1597,228,64
2,Lenovo,Intel,6.0,5.0,8,DDR4,256,1024,Windows,4,Gaming,0.0,No,No,74999,268,25,64
3,HP,Intel,6.0,6.0,16,DDR4,512,0,Windows,0,ThinNlight,1.0,No,Yes,66999,17,1,64
4,ASUS,AMD,5.0,0.0,4,DDR5,1024,0,Windows,4,Casual,1.0,No,No,114990,0,0,32


## **C. Label Encoder**

In [16]:
LE_brand = label_encoder['LE_brand']
LE_processor_brand = label_encoder['LE_processor_brand']
LE_ram_type = label_encoder['LE_ram_type']
LE_os = label_encoder['LE_os']
LE_weight = label_encoder['LE_weight']
LE_touchscreen = label_encoder['LE_touchscreen']
LE_msoffice = label_encoder['LE_msoffice']

In [17]:
Xnew['brand'] = LE_brand.transform(Xnew['brand'])
Xnew['processor_brand'] = LE_processor_brand.transform(Xnew['processor_brand'])
Xnew['ram_type'] = LE_ram_type.transform(Xnew['ram_type'])
Xnew['os'] = LE_os.transform(Xnew['os'])
Xnew['weight'] = LE_weight.transform(Xnew['weight'])
Xnew['msoffice']= LE_msoffice.transform(Xnew['msoffice'])
Xnew['Touchscreen'] = LE_touchscreen.transform(Xnew['Touchscreen'])

## **D. Feature Scailing**

In [18]:
#Normalization
PriceScaler = normalization_functions['normalize_price']

Xnew['Price'] = PriceScaler.transform(Xnew[['Price']])

NRatingScaler = normalization_functions['normalize_num_rating']

Xnew['Number of Ratings'] = NRatingScaler.transform(Xnew[['Number of Ratings']])

NReviewsScaler = normalization_functions['normalize_num_reviews']

Xnew['Number of Reviews'] = NReviewsScaler.transform(Xnew[['Number of Reviews']])


In [19]:
Xnew.columns

Index(['brand', 'processor_brand', 'processor_name', 'processor_gnrtn',
       'ram_gb', 'ram_type', 'ssd', 'hdd', 'os', 'graphic_card_gb', 'weight',
       'warranty', 'Touchscreen', 'msoffice', 'Price', 'Number of Ratings',
       'Number of Reviews', 'os-bits'],
      dtype='object')

## **E. Feature Selection**

In [20]:
selected_X = Xnew[['ram_gb', 'ram_type', 'ssd', 'warranty', 'msoffice', 'Price',
       'Number of Ratings', 'Number of Reviews', 'os-bits']]

In [21]:
selected_X.head()

Unnamed: 0,ram_gb,ram_type,ssd,warranty,msoffice,Price,Number of Ratings,Number of Reviews,os-bits
0,16,1,512,1.0,0,0.143553,0.070224,0.094972,64
1,8,1,512,3.0,0,0.101176,0.219037,0.254749,64
2,8,1,256,0.0,0,0.136492,0.036758,0.027933,64
3,16,1,512,1.0,1,0.117668,0.002332,0.001117,64
4,4,2,1024,1.0,0,0.230588,0.0,0.0,32


# **2. Trained Model**

In [22]:
# Load pickled model
#good 0
#bad 1
model = pickle.load(open('randomForest.pkl', 'rb'))

Ypred = model.predict(selected_X)

test_predictions_rounded = [round(pred) for pred in Ypred]
Y_pred_mapped = ['Good Rating' if rating == 0 else 'Bad Rating' for rating in test_predictions_rounded]
# print(test_predictions_rounded)

accuracy = sum(1 for pred, actual in zip(Y_pred_mapped, rating_list) if pred == actual) / len(Y_pred_mapped) * 100
print("Accuracy: {:.2f}%".format(accuracy))

print(Y_pred_mapped)

Accuracy: 89.58%
['Good Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad 

In [23]:
model = pickle.load(open('decisionTree.pkl', 'rb'))

Ypred = model.predict(selected_X)

test_predictions_rounded = [round(pred) for pred in Ypred]
Y_pred_mapped = ['Good Rating' if rating == 0 else 'Bad Rating' for rating in test_predictions_rounded]
# print(test_predictions_rounded)

accuracy = sum(1 for pred, actual in zip(Y_pred_mapped, rating_list) if pred == actual) / len(Y_pred_mapped) * 100
print("Accuracy: {:.2f}%".format(accuracy))

print(Y_pred_mapped)

Accuracy: 89.58%
['Good Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad 

In [24]:
model = pickle.load(open('XGBoost.pkl', 'rb'))

Ypred = model.predict(selected_X)

test_predictions_rounded = [round(pred) for pred in Ypred]
Y_pred_mapped = ['Good Rating' if rating == 0 else 'Bad Rating' for rating in test_predictions_rounded]
# print(test_predictions_rounded)

accuracy = sum(1 for pred, actual in zip(Y_pred_mapped, rating_list) if pred == actual) / len(Y_pred_mapped) * 100
print("Accuracy: {:.2f}%".format(accuracy))

print(Y_pred_mapped)

Accuracy: 87.50%
['Good Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad R

In [25]:
model = pickle.load(open('AdaBoost.pkl', 'rb'))

Ypred = model.predict(selected_X)

test_predictions_rounded = [round(pred) for pred in Ypred]
Y_pred_mapped = ['Good Rating' if rating == 0 else 'Bad Rating' for rating in test_predictions_rounded]
# print(test_predictions_rounded)

accuracy = sum(1 for pred, actual in zip(Y_pred_mapped, rating_list) if pred == actual) / len(Y_pred_mapped) * 100
print("Accuracy: {:.2f}%".format(accuracy))

print(Y_pred_mapped)

Accuracy: 88.89%
['Good Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Bad Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Bad Rating', 'Bad Rating', 'Good Rating', 'Good Rating', 'Good Rating', 'Bad Rating', 'Good Rating', 'Bad Rat