In [1]:
import pandas as pd

data = pd.read_csv("datasets/prostate_cancer.txt")
data

Unnamed: 0,id,lcavol,lweight,age,lbph,svi,lcp,gleason,pgg45,lpsa,train
0,1,-0.579818,2.769459,50,-1.386294,0,-1.386294,6,0,-0.430783,T
1,2,-0.994252,3.319626,58,-1.386294,0,-1.386294,6,0,-0.162519,T
2,3,-0.510826,2.691243,74,-1.386294,0,-1.386294,7,20,-0.162519,T
3,4,-1.203973,3.282789,58,-1.386294,0,-1.386294,6,0,-0.162519,T
4,5,0.751416,3.432373,62,-1.386294,0,-1.386294,6,0,0.371564,T
...,...,...,...,...,...,...,...,...,...,...,...
92,93,2.830268,3.876396,68,-1.386294,1,1.321756,7,60,4.385147,T
93,94,3.821004,3.896909,44,-1.386294,1,2.169054,7,40,4.684443,T
94,95,2.907447,3.396185,52,-1.386294,1,2.463853,7,10,5.143124,F
95,96,2.882564,3.773910,68,1.558145,1,1.558145,7,80,5.477509,T


### Data preprocessing - normalization 

x_scaled = (x-x_min)/(x_max-x_min)

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [32]:
X = data.drop(['id', 'lpsa', 'train'], axis='columns')
y = data['lpsa']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = MinMaxScaler()

scaler.fit(X_train)

x_train_scaled = scaler.transform(X_train)
x_test_scaled = scaler.transform(X_test)

model = LinearRegression()

model.fit(x_train_scaled, y_train)

y_pred = model.predict(x_test_scaled)

# Aniqlilik darajasini hisoblash

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean squared error: ", mse)
print("Mean absolute error: ", mae)
print("R2 Score: ", r2)




Mean squared error:  0.3469917891761397
Mean absolute error:  0.4278470015625614
R2 Score:  0.7575175130204983


In [33]:
ridge_model = Ridge(alpha=1)

ridge_model.fit(x_train_scaled, y_train)

y_pred = ridge_model.predict(x_test_scaled)

# Aniqlilik darajasini hisoblash

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean squared error: ", mse)
print("Mean absolute error: ", mae)
print("R2 Score: ", r2)

Mean squared error:  0.3721252423374114
Mean absolute error:  0.4446973890548588
R2 Score:  0.7399539209729806


In [34]:
lasso_model = Lasso(alpha=0.01)

lasso_model.fit(x_train_scaled, y_train)

y_pred = lasso_model.predict(x_test_scaled)

# Aniqlilik darajasini hisoblash

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean squared error: ", mse)
print("Mean absolute error: ", mae)
print("R2 Score: ", r2)

Mean squared error:  0.3758926016160169
Mean absolute error:  0.45293101483850184
R2 Score:  0.7373212400977631


### Data preprocessing - standartization

x_scaled = (x-mean)/std

In [35]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaler.fit(X_train)

x_standard_train_scaled = scaler.transform(X_train)
x_standard_test_scaled = scaler.transform(X_test)

ridge_model = Ridge(alpha=1)

ridge_model.fit(x_standard_train_scaled, y_train)

y_pred = ridge_model.predict(x_standard_test_scaled)

# Aniqlilik darajasini hisoblash

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean squared error: ", mse)
print("Mean absolute error: ", mae)
print("R2 Score: ", r2)

Mean squared error:  0.3463315404543104
Mean absolute error:  0.42571366815147427
R2 Score:  0.7579789036271013
