In [75]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [87]:
# Read the CSV file
computers = pd.read_csv('ml_df.csv')

# Label encode categorical columns
le = LabelEncoder()
computers['manufacturer'] = le.fit_transform(computers['manufacturer'])
computers['wireless_network'] = le.fit_transform(computers['wireless_network'])
computers['graphics_card'] = le.fit_transform(computers['graphics_card'])

# Drop irrelevant columns
df_model = computers.drop(['model', 'series', 'comp_model_url'], axis=1)

# Check for missing values
print(df_model.isna().sum())

# Fill missing values with mode
df_model_filled = df_model.fillna(df_model.mode().iloc[0])

# Print the updated DataFrame information
print(df_model_filled.info())

# Define the features and target variable
features = df_model_filled[['manufacturer', 'operating_system', 'weight', 'RAM_memory_volume',
                            'CPU_speed', 'processor_generation', 'low_price', 'graphics_card',
                            'wireless_network']]

# Split the target variable into 10 bins
target_bins = pd.cut(df_model_filled['average_price'], bins=10, labels=False)

# Split the data into training and test sets
x_train, x_test, y_train, y_test = train_test_split(features, target_bins, test_size=0.2, random_state=42)

# Train a linear regression model
lr = LinearRegression()
reg_model = lr.fit(x_train, y_train)
y_pred_lr = reg_model.predict(x_test)

# Train a random forest regression model
rf = RandomForestRegressor()
rf_model = rf.fit(x_train, y_train)
y_pred_rf = rf_model.predict(x_test)

# Train a support vector regression model
svr = SVR()
reg_model_svr = svr.fit(x_train, y_train)
y_pred_svr = reg_model_svr.predict(x_test)

# Evaluate the models
r2_lr = r2_score(y_test, y_pred_lr)
r2_rf = r2_score(y_test, y_pred_rf)
r2_svr = r2_score(y_test, y_pred_svr)

manufacturer               0
gaming_adapt               0
operating_system           0
weight                    15
RAM_memory_volume          5
CPU_speed                123
processor_generation     128
memory_type              140
storage_capacity          28
hard_drive_type           21
screen_size               29
screen_resolution         28
screen_type               42
display_refresh_rate      79
touch_screen               0
webcam                     0
security_measures          0
low_price                 10
high_price                10
average_price              1
Data_acquisition_date      0
computer_entry_date        0
graphics_card              0
processor_type           783
wireless_network           0
dtype: int64
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 903 entries, 0 to 902
Data columns (total 25 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   manufacturer           903 non-null    int64  
 

In [88]:
print("Linear Regression R-squared:", r2_lr)
resDF1 = pd.DataFrame({"Actual":y_test,"Predicted":y_pred_lr})
resDF1

Linear Regression R-squared: 0.765834742041941


Unnamed: 0,Actual,Predicted
70,0,0.174202
457,1,0.839026
218,1,1.165338
250,1,1.032071
39,1,0.467960
...,...,...
863,0,-0.155522
442,0,0.003538
858,0,0.196262
25,0,-0.133358


In [89]:
print("Random Forest Regression R-squared:", r2_rf)
resDF2 = pd.DataFrame({"Actual":y_test,"Predicted":y_pred_rf})
resDF2

Random Forest Regression R-squared: 0.9360164263286429


Unnamed: 0,Actual,Predicted
70,0,0.0
457,1,1.0
218,1,1.0
250,1,1.0
39,1,0.5
...,...,...
863,0,0.0
442,0,0.0
858,0,0.0
25,0,0.0


In [90]:
print("Support Vector Regression R-squared:", r2_svr)
resDF3 = pd.DataFrame({"Actual":y_test,"Predicted":y_pred_svr})
resDF3

Support Vector Regression R-squared: 0.8033243743525186


Unnamed: 0,Actual,Predicted
70,0,0.055733
457,1,0.778728
218,1,1.241595
250,1,1.172334
39,1,0.452063
...,...,...
863,0,-0.086567
442,0,-0.099183
858,0,0.006264
25,0,-0.079877
